From 544230390ec02c1a2674b52c627129c0fb3deeb3 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 1 Dec 2021 08:51:30 +0100
Subject: [PATCH 001/253] New Bayesian Optimization methods

---
 kernel_tuner/interface.py                     |   5 +-
 kernel_tuner/strategies/bayes_opt.py          |  62 +-
 kernel_tuner/strategies/bayes_opt_GPyTorch.py | 918 ++++++++++++++++++
 .../strategies/bayes_opt_alt_BOTorch.py       |  83 ++
 kernel_tuner/strategies/bayes_opt_old.py      | 837 ++++++++++++++++
 5 files changed, 1868 insertions(+), 37 deletions(-)
 create mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 create mode 100644 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 create mode 100644 kernel_tuner/strategies/bayes_opt_old.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 1c0448214..e2da180c5 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -45,7 +45,7 @@
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt
+from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt, bayes_opt_old, bayes_opt_GPyTorch, bayes_opt_alt_BOTorch
 
 strategy_map = {
     "brute_force": brute_force,
@@ -59,6 +59,9 @@
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
     "bayes_opt": bayes_opt,
+    "bayes_opt_old": bayes_opt_old,
+    "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
+    "bayes_opt_BOTorch": bayes_opt_alt_BOTorch,
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index ccdd2638b..56ec6e720 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -376,12 +376,12 @@ def predict(self, x) -> Tuple[float, float]:
         """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
         return self.__model.predict([x], return_std=True)
 
-    def predict_list(self, lst: list) -> Tuple[list, list, list]:
+    def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
         """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             mu, std = self.__model.predict(lst, return_std=True)
-            return list(zip(mu, std)), mu, std
+            return mu, std
 
     def fit_observations_to_model(self):
         """ Update the model based on the current list of observations """
@@ -459,7 +459,7 @@ def initial_sample(self):
             if self.is_valid(observation):
                 collected_samples += 1
         self.fit_observations_to_model()
-        _, _, std = self.predict_list(self.unvisited_cache)
+        _, std = self.predict_list(self.unvisited_cache)
         self.initial_sample_mean = np.mean(self.__valid_observations)
         # Alternatively:
         # self.initial_sample_std = np.std(self.__valid_observations)
@@ -490,8 +490,8 @@ def __optimize(self, max_fevals):
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
             list_of_acquisition_values = self.__af(predictions, hyperparam)
             # afterwards select the best AF value
             best_af = self.argopt(list_of_acquisition_values)
@@ -522,8 +522,8 @@ def __optimize_multi(self, max_fevals):
             time_start = time.perf_counter_ns()
             # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
             aqfs = self.multi_afs
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             time_predictions = time.perf_counter_ns()
@@ -635,8 +635,8 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                 raise ValueError(self.error_message_searchspace_fully_observed)
             observations_median = np.median(self.__valid_observations)
             if increase_precision is False:
-                predictions, _, std = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(std)
+                predictions = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(predictions[1])
             for af_index, af in enumerate(aqfs):
                 if af_index in skip_af_index:
                     continue
@@ -647,7 +647,8 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                     hyperparam = self.contextual_variance(std)
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
+                np.delete(predictions[0], best_af)    # to avoid going out of bounds
+                np.delete(predictions[1], best_af)
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -719,8 +720,8 @@ def __optimize_multi_fast(self, max_fevals):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             for af in aqfs:
@@ -728,7 +729,8 @@ def __optimize_multi_fast(self, max_fevals):
                     break
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
+                del predictions[0][best_af]    # to avoid going out of bounds
+                del predictions[1][best_af]
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -746,65 +748,53 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
         """ Acquisition function Probability of Improvement (PI) """
 
         # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1E-9)) for (x_mu, x_std) in predictions)
+        list_diff_improvement = -((fplus - x_mu) / (x_std + 1E-9))
 
         # compute probability of improvement with CDF in bulk
         list_prob_improvement = norm.cdf(list_diff_improvement)
-
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         """ Acquisition function Expected Improvement (EI) """
 
         # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement, CDF and PDF in bulk
-        list_diff_improvement = list((fplus - x_mu) / (x_std + 1E-9) for (x_mu, x_std) in predictions)
+        list_diff_improvement = (fplus - x_mu) / (x_std + 1E-9)
         list_cdf = norm.cdf(list_diff_improvement)
         list_pdf = norm.pdf(list_diff_improvement)
 
-        # specify AF calculation
-        def exp_improvement(index) -> float:
-            x_mu, x_std = predictions[index]
-            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
-            return -ei
-
-        # calculate AF
-        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
+        # compute expected improvement in bulk
+        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         """ Acquisition function Lower Confidence Bound (LCB) """
 
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         beta = hyperparam
 
         # compute LCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        list_lower_confidence_bound = (x_mu - beta * x_std)
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
         """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
 
         # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
 
@@ -816,7 +806,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
 
         # compute UCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        list_lower_confidence_bound = (x_mu - beta * x_std)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
@@ -824,7 +814,7 @@ def visualize_after_opt(self):
         print(self.__model.kernel_.get_params())
         print(self.__model.log_marginal_likelihood())
         import matplotlib.pyplot as plt
-        _, mu, std = self.predict_list(self.searchspace)
+        mu, std = self.predict_list(self.searchspace)
         brute_force_observations = list()
         for param_config in self.searchspace:
             obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
new file mode 100644
index 000000000..31b987ca6
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
@@ -0,0 +1,918 @@
+""" Bayesian Optimization implementation from the thesis by Willemsen """
+from copy import deepcopy
+from random import randint, shuffle
+import itertools
+import warnings
+import time
+from typing import Tuple
+
+import numpy as np
+from scipy.stats import norm
+
+# BO imports
+try:
+    import torch
+    import gpytorch
+    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
+    from sklearn.exceptions import ConvergenceWarning
+    from skopt.sampler import Lhs
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from kernel_tuner.strategies import minimize
+from kernel_tuner import util
+
+supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
+
+
+def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
+    """ Generates normalization and denormalization dictionaries """
+    original_to_normalized = dict()
+    normalized_to_original = dict()
+    for param_name in tune_params.keys():
+        original_to_normalized_dict = dict()
+        normalized_to_original_dict = dict()
+        for value_index, value in enumerate(tune_params[param_name]):
+            normalized_value = eps * value_index + 0.5 * eps
+            normalized_to_original_dict[normalized_value] = value
+            original_to_normalized_dict[value] = normalized_value
+        original_to_normalized[param_name] = original_to_normalized_dict
+        normalized_to_original[param_name] = normalized_to_original_dict
+    return original_to_normalized, normalized_to_original
+
+
+def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
+    """ Normalize the parameter space given a normalization dictionary """
+    keys = list(tune_params.keys())
+    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
+    return param_space_normalized
+
+
+def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
+    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    pruned_tune_params_mask = list()
+    removed_tune_params = list()
+    param_names = list(tune_params.keys())
+    for index, key in enumerate(tune_params.keys()):
+        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
+        if len(tune_params[key]) > 1:
+            removed_tune_params.append(None)
+        else:
+            value = tune_params[key][0]
+            normalized = normalize_dict[param_names[index]][value]
+            removed_tune_params.append(normalized)
+    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
+        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
+    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
+    return parameter_space, removed_tune_params
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process. Allows setting hyperparameters via the strategy_options key.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
+    if not bayes_opt_present:
+        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed, please install scikit-learn and scikit-optimize")
+
+    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
+    tune_params = tuning_options.tune_params
+    tuning_options["scaling"] = True
+    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
+
+    # compute cartesian product of all tunable parameters
+    parameter_space = itertools.product(*tune_params.values())
+
+    # check for search space restrictions
+    if tuning_options.restrictions is not None:
+        tuning_options.verbose = False
+    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
+    parameter_space = list(parameter_space)
+    if len(parameter_space) < 1:
+        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
+    if len(parameter_space) == 1:
+        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
+
+    # normalize search space to [0,1]
+    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
+    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
+
+    # prune the parameter space to remove dimensions that have a constant parameter
+    if prune_parameterspace:
+        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
+    else:
+        parameter_space = list(parameter_space)
+        removed_tune_params = [None] * len(tune_params.keys())
+
+    # initialize and optimize
+    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
+    results = bo.optimize(max_fevals)
+
+    return results, runner.dev.get_environment()
+
+
+class ExactGPModel(gpytorch.models.ExactGP):
+    """ Very simple exact Gaussian Process model """
+
+    def __init__(self, train_x, train_y, likelihood):
+        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
+        self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
+        self.covar_module = gpytorch.kernels.MaternKernel(nu=1.5)    # TODO maybe try ScaleKernel(MaternKernel)
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+
+class BayesianOptimization():
+
+    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
+                 runner, opt_direction='min'):
+        time_start = time.perf_counter_ns()
+
+        # supported hyperparameter values
+        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
+        self.supported_methods = supported_methods
+        self.supported_sampling_methods = ["random", "lhs"]
+        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
+
+        def get_hyperparam(name: str, default, supported_values=list()):
+            value = tuning_options.strategy_options.get(name, default)
+            if len(supported_values) > 0 and value not in supported_values:
+                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+            return value
+
+        # get hyperparameters
+        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
+        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
+        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
+        acq = acquisition_function
+        acq_params = get_hyperparam("methodparams", {})
+        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
+        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
+        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
+        self.training_iter = get_hyperparam("training_iter", 10)
+        self.num_initial_samples = get_hyperparam("popsize", 20)
+        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
+        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
+        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
+
+        # set acquisition function hyperparameter defaults where missing
+        if 'explorationfactor' not in acq_params:
+            acq_params['explorationfactor'] = 'CV'
+        if 'zeta' not in acq_params:
+            acq_params['zeta'] = 1
+        if 'skip_duplicate_after' not in acq_params:
+            acq_params['skip_duplicate_after'] = 5
+
+        # set arguments
+        self.kernel_options = kernel_options
+        self.tuning_options = tuning_options
+        self.tune_params = tuning_options.tune_params
+        self.param_names = list(self.tune_params.keys())
+        self.normalized_dict = normalize_dict
+        self.denormalized_dict = denormalize_dict
+        self.runner = runner
+        self.max_threads = runner.dev.max_threads
+        self.log_timings = False
+
+        # set optimization constants
+        self.invalid_value = 1e20
+        self.opt_direction = opt_direction
+        if opt_direction == 'min':
+            self.worst_value = np.PINF
+            self.argopt = np.argmin
+        elif opt_direction == 'max':
+            self.worst_value = np.NINF
+            self.argopt = np.argmax
+        else:
+            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
+
+        # set the acquisition function and surrogate model
+        self.optimize = self.__optimize
+        self.af_name = acquisition_function
+        self.af_params = acq_params
+        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
+        self.set_acquisition_function(acquisition_function)
+        # self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
+
+        # set remaining values
+        self.results = []
+        self.__searchspace = searchspace
+        self.removed_tune_params = removed_tune_params
+        self.searchspace_size = len(self.searchspace)
+        self.hyperparams = {
+            'loss': np.nan,
+            'lengthscale': np.nan,
+            'noise': np.nan,
+        }
+        self.num_dimensions = len(self.dimensions())
+        self.__current_optimum = self.worst_value
+        self.cv_norm_maximum = None
+        self.fevals = 0
+        self.__visited_num = 0
+        self.__visited_valid_num = 0
+        self.__visited_searchspace_indices = [False] * self.searchspace_size
+        self.__observations = [np.NaN] * self.searchspace_size
+        self.__valid_observation_indices = [False] * self.searchspace_size
+        self.__valid_params = list()
+        self.__valid_observations = list()
+        self.unvisited_cache = self.unvisited()
+        time_setup = time.perf_counter_ns()
+        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
+
+        # take initial sample
+        self.initial_sample()
+        time_initial_sample = time.perf_counter_ns()
+
+        # print the timings
+        if self.log_timings:
+            time_taken_setup = round(time_setup - time_start, 3) / 1000
+            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
+            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
+            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
+
+    @property
+    def searchspace(self):
+        return self.__searchspace
+
+    @property
+    def observations(self):
+        return self.__observations
+
+    @property
+    def current_optimum(self):
+        return self.__current_optimum
+
+    @current_optimum.setter
+    def current_optimum(self, value: float):
+        self.__current_optimum = value
+
+    def is_better_than(self, a: float, b: float) -> bool:
+        """ Determines which one is better depending on optimization direction """
+        return a < b if self.opt_direction == 'min' else a > b
+
+    def is_not_visited(self, index: int) -> bool:
+        """ Returns whether a searchspace index has not been visited """
+        return not self.__visited_searchspace_indices[index]
+
+    def is_valid(self, observation: float) -> bool:
+        """ Returns whether an observation is valid """
+        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+
+    def get_af_by_name(self, name: str):
+        """ Get the basic acquisition functions by their name """
+        basic_af_names = ['ei', 'poi', 'lcb']
+        if name == 'ei':
+            return self.af_expected_improvement
+        elif name == 'poi':
+            return self.af_probability_of_improvement
+        elif name == 'lcb':
+            return self.af_lower_confidence_bound
+        raise ValueError(f"{name} not in {basic_af_names}")
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function """
+        if acquisition_function == 'poi':
+            self.__af = self.af_probability_of_improvement
+        elif acquisition_function == 'ei':
+            self.__af = self.af_expected_improvement
+        elif acquisition_function == 'lcb':
+            self.__af = self.af_lower_confidence_bound
+        elif acquisition_function == 'lcb-srinivas':
+            self.__af = self.af_lower_confidence_bound_srinivas
+        elif acquisition_function == 'random':
+            self.__af = self.af_random
+        elif acquisition_function == 'multi':
+            self.optimize = self.__optimize_multi
+        elif acquisition_function == 'multi-advanced':
+            self.optimize = self.__optimize_multi_advanced
+        elif acquisition_function == 'multi-fast':
+            self.optimize = self.__optimize_multi_fast
+        else:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
+        """ Set the surrogate model with a covariance function and lengthscale """
+        # TODO remove or adapt this
+        if cov_kernel_name == "constantrbf":
+            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "rbf":
+            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern32":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern52":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
+        else:
+            raise ValueError(f"Acquisition function must be one of {self.supported_cov_kernels}, is {cov_kernel_name}")
+        likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        self.__model = ExactGPModel(train_x, train_y, likelihood)
+        # self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
+
+    def valid_params_observations(self) -> Tuple[list, list]:
+        """ Returns a list of valid observations and their parameter configurations """
+        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
+        params = list()
+        observations = list()
+        for index, valid in enumerate(self.__valid_observation_indices):
+            if valid is True:
+                params.append(self.searchspace[index])
+                observations.append(self.observations[index])
+        return params, observations
+
+    def unvisited(self) -> list:
+        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
+        return params
+
+    def find_param_config_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the search space if it exists """
+        return self.searchspace.index(param_config)
+
+    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the unvisited cache if it exists """
+        return self.unvisited_cache.index(param_config)
+
+    def normalize_param_config(self, param_config: tuple) -> tuple:
+        """ Normalizes a parameter configuration """
+        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return normalized
+
+    def denormalize_param_config(self, param_config: tuple) -> tuple:
+        """ Denormalizes a parameter configuration """
+        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return denormalized
+
+    def unprune_param_config(self, param_config: tuple) -> tuple:
+        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        unpruned = list()
+        pruned_count = 0
+        for removed in self.removed_tune_params:
+            if removed is not None:
+                unpruned.append(removed)
+            else:
+                unpruned.append(param_config[pruned_count])
+                pruned_count += 1
+        return tuple(unpruned)
+
+    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
+        """ Adjust the visited and valid index records accordingly """
+        validity = self.is_valid(observation)
+        self.__visited_num += 1
+        self.__observations[index] = observation
+        self.__visited_searchspace_indices[index] = True
+        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
+        self.__valid_observation_indices[index] = validity
+        if validity is True:
+            self.__visited_valid_num += 1
+            self.__valid_params.append(param_config)
+            self.__valid_observations.append(observation)
+            if self.is_better_than(observation, self.current_optimum):
+                self.current_optimum = observation
+
+    def predict(self, x) -> Tuple[float, float]:
+        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        return self.__model.predict([x], return_std=True)
+
+    def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
+        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            test_x = torch.Tensor(lst)
+            observed_pred = self.__likelihood(self.__model(test_x))
+            mu = observed_pred.mean
+            std = observed_pred.variance
+            return mu.numpy(), std.numpy()
+
+    def evaluate_objective_function(self, param_config: tuple) -> float:
+        """ Evaluates the objective function """
+        param_config = self.unprune_param_config(param_config)
+        denormalized_param_config = self.denormalize_param_config(param_config)
+        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
+            return self.invalid_value
+        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+        self.fevals += 1
+        self.add_model_hyperparams_to_result(denormalized_param_config)
+        return val
+
+    def add_model_hyperparams_to_result(self, param_config: tuple):
+        """ Add the model parameters (loss and noise) to the results dict at the last result """
+        # assert that the results index corresponds to the last index
+        assert self.find_config_index_in_results(param_config) == len(self.results) - 1
+
+        for key, value in self.hyperparams.items():
+            # print(f"{key}: {value}")
+            self.results[-1][key] = value
+
+    def find_config_index_in_results(self, param_config: tuple):
+        """ Find the index of a parameter configuration in the results. Beware that this can be very slow! """
+        found_indices = list()
+        for results_index, result_dict in enumerate(self.results):
+            keys = list(result_dict.keys())
+            found = True
+            for index, value in enumerate(param_config):
+                if result_dict[keys[index]] != value:
+                    found = False
+            if found is True:
+                found_indices.append(results_index)
+        assert len(found_indices) == 1
+        return found_indices[0]
+
+    def dimensions(self) -> list:
+        """ List of parameter values per parameter """
+        return self.tune_params.values()
+
+    def draw_random_sample(self) -> Tuple[list, int]:
+        """ Draw a random sample from the unvisited parameter configurations """
+        if len(self.unvisited_cache) < 1:
+            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
+        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
+        param_config = self.unvisited_cache[index]
+        actual_index = self.find_param_config_index(param_config)
+        return param_config, actual_index
+
+    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
+        """ Draws an LHS-distributed sample from the search space """
+        if self.searchspace_size < num_samples:
+            raise ValueError("Can't sample more than the size of the search space")
+        if self.sampling_crit is None:
+            lhs = Lhs(lhs_type="centered", criterion=None)
+        else:
+            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
+        param_configs = lhs.generate(self.dimensions(), num_samples)
+        indices = list()
+        normalized_param_configs = list()
+        for i in range(len(param_configs) - 1):
+            try:
+                param_config = self.normalize_param_config(param_configs[i])
+                index = self.find_param_config_index(param_config)
+                indices.append(index)
+                normalized_param_configs.append(param_config)
+            except ValueError:
+                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
+                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
+                continue
+        return list(zip(normalized_param_configs, indices))
+
+    def train_model_hyperparams(self):
+        """ Train the model and likelihood hyperparameters """
+        # set to training modes
+        self.__model.train()
+        self.__likelihood.train()
+
+        # Use the adam optimizer
+        optimizer = torch.optim.Adam(self.__model.parameters(), lr=0.1)    # Includes GaussianLikelihood parameters
+
+        # "Loss" for GPs - the marginal log likelihood
+        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.__likelihood, self.__model)
+
+        loss = 0
+        lengthscale = 0
+        noise = 0
+        for i in range(self.training_iter):
+            # Zero gradients from previous iteration
+            optimizer.zero_grad()
+            # Output from model
+            output = self.__model(self.__tparams)
+            # Calc loss and backprop gradients
+            loss = -mll(output, self.__tobservations)
+            loss.backward()
+            # print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' %
+            #       (i + 1, self.training_iter, loss.item(), self.__model.covar_module.base_kernel.lengthscale.item(), self.__model.likelihood.noise.item()))
+            optimizer.step()
+
+        # set to prediction mode
+        self.__model.eval()
+        self.__likelihood.eval()
+
+        # set the hyperparameters globally for reference
+        self.hyperparams = {
+            'loss': loss.item(),
+            'lengthscale': self.__model.covar_module.lengthscale.item(),
+            'noise': self.__model.likelihood.noise.item(),
+        }
+        # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
+
+    def initial_sample(self):
+        """ Draws an initial sample using random sampling """
+        if self.num_initial_samples <= 0:
+            raise ValueError("At least one initial sample is required")
+        if self.sampling_method == 'lhs':
+            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
+        elif self.sampling_method == 'random':
+            samples = list()
+        else:
+            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
+        # collect the samples
+        collected_samples = 0
+        for params, index in samples:
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            if self.is_valid(observation):
+                collected_samples += 1
+        # collect the remainder of the samples
+        while collected_samples < self.num_initial_samples:
+            params, index = self.draw_random_sample()
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            # check for validity to avoid having no actual initial samples
+            if self.is_valid(observation):
+                collected_samples += 1
+        # instantiate the model with the initial sample
+        self.__likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        self.__tparams = torch.Tensor(self.__valid_params)
+        self.__tobservations = torch.Tensor(self.__valid_observations)
+        self.__model = ExactGPModel(self.__tparams, self.__tobservations, self.__likelihood)
+        self.train_model_hyperparams()
+
+        # extract the predictions
+        _, std = self.predict_list(self.unvisited_cache)
+        self.initial_sample_mean = np.mean(self.__valid_observations)
+        # Alternatively:
+        # self.initial_sample_std = np.std(self.__valid_observations)
+        # self.initial_sample_mean = np.mean(predictions)
+        self.initial_std = np.mean(std)
+        self.cv_norm_maximum = self.initial_std
+
+    def contextual_variance(self, std: list):
+        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        if not self.af_params['explorationfactor'] == 'CV':
+            return None
+        if self.opt_direction == 'min':
+            if self.current_optimum == self.worst_value:
+                return 0.01
+            if self.current_optimum <= 0:
+                # doesn't work well for minimization beyond 0, should that even be a thing?
+                return abs(np.mean(std) / self.current_optimum)
+            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
+            cv = np.mean(std) / improvement_over_initial_sample
+            # normalize if available
+            if self.cv_norm_maximum:
+                cv = cv / self.cv_norm_maximum
+            return cv
+        return np.mean(std) / self.current_optimum
+
+    def __optimize(self, max_fevals):
+        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        while self.fevals < max_fevals:
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
+            list_of_acquisition_values = self.__af(predictions, hyperparam)
+            # afterwards select the best AF value
+            best_af = self.argopt(list_of_acquisition_values)
+            candidate_params = self.unvisited_cache[best_af]
+            candidate_index = self.find_param_config_index(candidate_params)
+            observation = self.evaluate_objective_function(candidate_params)
+            self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.train_model_hyperparams()
+        return self.results
+
+    def __optimize_multi(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
+        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
+        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
+        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
+        discount_factor = self.multi_afs_discount_factor
+        # setup the registration of duplicates and runtimes
+        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
+        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
+        skip_af_index = list()
+        af_runtimes = [0, 0, 0]
+        af_observations = [list(), list(), list()]
+        initial_sample_mean = np.mean(self.__valid_observations)
+        while self.fevals < max_fevals:
+            time_start = time.perf_counter_ns()
+            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
+            aqfs = self.multi_afs
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            time_predictions = time.perf_counter_ns()
+            actual_candidate_params = list()
+            actual_candidate_indices = list()
+            actual_candidate_af_indices = list()
+            duplicate_candidate_af_indices = list()
+            duplicate_candidate_original_af_indices = list()
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                timer_start = time.perf_counter()
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                time_taken = time.perf_counter() - timer_start
+                af_runtimes[af_index] += time_taken
+                is_duplicate = best_af in actual_candidate_indices
+                if not is_duplicate:
+                    candidate_params = self.unvisited_cache[best_af]
+                    actual_candidate_params.append(candidate_params)
+                    actual_candidate_indices.append(best_af)
+                    actual_candidate_af_indices.append(af_index)
+                # register whether the AF suggested a duplicate candidate
+                duplicate_candidate_af_count[af_index].pop(0)
+                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
+                if is_duplicate:
+                    # find the index of the AF that first registered the duplicate
+                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
+                    # register that AF as duplicate as well
+                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
+                    duplicate_candidate_af_indices.append(af_index)
+                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
+            time_afs = time.perf_counter_ns()
+            # evaluate the non-duplicate candidates
+            for index, af_index in enumerate(actual_candidate_af_indices):
+                candidate_params = actual_candidate_params[index]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if observation != self.invalid_value:
+                    # we use the registered observations for maximization of the discounted reward
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
+                else:
+                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
+                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
+            for index, af_index in enumerate(duplicate_candidate_af_indices):
+                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
+                af_observations[af_index].append(original_observation)
+            self.train_model_hyperparams()
+            time_eval = time.perf_counter_ns()
+            # assert that all observation lists of non-skipped acquisition functions are of the same length
+            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
+            # find the AFs elligble for being skipped
+            candidates_for_skip = list()
+            for af_index, count in enumerate(duplicate_candidate_af_count):
+                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
+                    candidates_for_skip.append(af_index)
+            # do not skip the AF with the lowest runtime
+            if len(candidates_for_skip) > 1:
+                candidates_for_skip_discounted = list(
+                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
+                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
+                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
+                for af_index in candidates_for_skip:
+                    if af_index == af_not_to_skip:
+                        # do not skip the AF with the lowest runtime and give it a clean slate
+                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
+                        continue
+                    skip_af_index.append(af_index)
+                    if len(skip_af_index) >= len(aqfs):
+                        raise ValueError("There are no acquisition functions left! This should not happen...")
+            time_af_selection = time.perf_counter_ns()
+
+            # printing timings
+            if self.log_timings:
+                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
+                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
+                time_taken_eval = round(time_eval - time_afs, 3) / 1000
+                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
+                time_taken_total = round(time_af_selection - time_start, 3) / 1000
+                print(
+                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
+                    flush=True)
+        return self.results
+
+    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        aqfs = self.multi_afs
+        discount_factor = self.multi_afs_discount_factor
+        required_improvement_factor = self.multi_afs_required_improvement_factor
+        required_improvement_worse = 1 + required_improvement_factor
+        required_improvement_better = 1 - required_improvement_factor
+        min_required_count = self.af_params['skip_duplicate_after']
+        skip_af_index = list()
+        single_af = len(aqfs) <= len(skip_af_index) + 1
+        af_observations = [list(), list(), list()]
+        af_performs_worse_count = [0, 0, 0]
+        af_performs_better_count = [0, 0, 0]
+        while self.fevals < max_fevals:
+            if single_af:
+                return self.__optimize(max_fevals)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            observations_median = np.median(self.__valid_observations)
+            if increase_precision is False:
+                predictions = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(predictions[1])
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                if increase_precision is True:
+                    predictions = self.predict_list(self.unvisited_cache)
+                    hyperparam = self.contextual_variance(predictions[1])
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                # to avoid going out of bounds on the next iteration, remove the best_af
+                predictions = (np.delete(predictions[0], best_af), np.delete(predictions[1], best_af))
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if increase_precision is True:
+                    self.train_model_hyperparams()
+                # we use the registered observations for maximization of the discounted reward
+                if observation != self.invalid_value:
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[af_index].append(reg_observation)
+                else:
+                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
+                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
+                    af_observations[af_index].append(reg_invalid_observation)
+            if increase_precision is False:
+                self.train_model_hyperparams()
+
+            # calculate the mean of discounted observations over the remaining acquisition functions
+            discounted_obs = list(
+                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
+            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
+
+            # register which AFs perform more than 10% better than average and which more than 10% worse than average
+            for af_index, discounted_observation in enumerate(discounted_obs):
+                if discounted_observation > disc_obs_mean * required_improvement_worse:
+                    af_performs_worse_count[af_index] += 1
+                elif discounted_observation < disc_obs_mean * required_improvement_better:
+                    af_performs_better_count[af_index] += 1
+
+            # find the worst AF, discounted observations is leading for a draw
+            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
+            af_index_worst = -1
+            if worst_count >= min_required_count:
+                for af_index, count in enumerate(af_performs_worse_count):
+                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
+                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
+                        af_index_worst = af_index
+
+            # skip the worst AF
+            if af_index_worst > -1:
+                skip_af_index.append(af_index_worst)
+                # reset the counts to even the playing field for the remaining AFs
+                af_performs_worse_count = [0, 0, 0]
+                af_performs_better_count = [0, 0, 0]
+                # if there is only one AF left, register as single AF
+                if len(aqfs) <= len(skip_af_index) + 1:
+                    single_af = True
+                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+                    assert len(af_indices_left) == 1
+                    self.__af = aqfs[af_indices_left[0]]
+            else:
+                # find the best AF, discounted observations is leading for a draw
+                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
+                af_index_best = -1
+                if best_count >= min_required_count:
+                    for af_index, count in enumerate(af_performs_better_count):
+                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
+                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
+                            af_index_best = af_index
+                # make the best AF single
+                if af_index_best > -1:
+                    single_af = True
+                    self.__af = aqfs[af_index_best]
+
+        return self.results
+
+    def __optimize_multi_fast(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        while self.fevals < max_fevals:
+            aqfs = self.multi_afs
+            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            for af in aqfs:
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[0][best_af]    # to avoid going out of bounds
+                del predictions[1][best_af]
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.train_model_hyperparams()
+        return self.results
+
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = range(len(self.unvisited_cache))
+        shuffle(list_random)
+        return list_random
+
+    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Probability of Improvement (PI) """
+
+        # prefetch required data
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement
+        list_diff_improvement = -((fplus - x_mu) / (x_std + 1E-9))
+
+        # compute probability of improvement with CDF in bulk
+        list_prob_improvement = norm.cdf(list_diff_improvement)
+        return list_prob_improvement
+
+    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Expected Improvement (EI) """
+
+        # prefetch required data
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement, CDF and PDF in bulk
+        list_diff_improvement = (fplus - x_mu) / (x_std + 1E-9)
+        list_cdf = norm.cdf(list_diff_improvement)
+        list_pdf = norm.pdf(list_diff_improvement)
+
+        # compute expected improvement in bulk
+        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
+        return list_exp_improvement
+
+    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (LCB) """
+
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        beta = hyperparam
+
+        # compute LCB in bulk
+        list_lower_confidence_bound = (x_mu - beta * x_std)
+        return list_lower_confidence_bound
+
+    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
+
+        # prefetch required data
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+
+        # precompute beta parameter
+        zeta = self.af_params['zeta']
+        t = self.fevals
+        d = self.num_dimensions
+        delta = hyperparam
+        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
+
+        # compute UCB in bulk
+        list_lower_confidence_bound = (x_mu - beta * x_std)
+        return list_lower_confidence_bound
+
+    def visualize_after_opt(self):
+        """ Visualize the model after the optimization """
+        print(self.__model.kernel_.get_params())
+        print(self.__model.log_marginal_likelihood())
+        import matplotlib.pyplot as plt
+        mu, std = self.predict_list(self.searchspace)
+        brute_force_observations = list()
+        for param_config in self.searchspace:
+            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+            if obs == self.invalid_value:
+                obs = None
+            brute_force_observations.append(obs)
+        x_axis = range(len(mu))
+        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
+        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
+        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
+        plt.legend()
+        plt.show()
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
new file mode 100644
index 000000000..891db5236
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
@@ -0,0 +1,83 @@
+""" BOTorch package from https://github.com/pytorch/botorch """
+from __future__ import print_function
+
+from collections import OrderedDict
+import numpy as np
+
+try:
+    import torch
+    from botorch.models import SingleTaskGP
+    from botorch.fit import fit_gpytorch_model
+    from botorch.utils import standardize
+    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from botorch.acquisition import UpperConfidenceBound
+    from botorch.optim import optimize_acqf
+except Exception:
+    BayesianOptimization = None
+    bayes_opt_present = False
+
+from kernel_tuner.strategies import minimize
+
+supported_methods = ["poi", "ei", "ucb"]
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    if not bayes_opt_present:
+        raise ImportError("Error: optional dependency Bayesian Optimization not installed")
+    init_points = tuning_options.strategy_options.get("popsize", 20)
+    n_iter = tuning_options.strategy_options.get("max_fevals", 100)
+
+    # defaults as used by Bayesian Optimization Python package
+    acq = tuning_options.strategy_options.get("method", "ucb")
+    kappa = tuning_options.strategy_options.get("kappa", 2.576)
+    xi = tuning_options.strategy_options.get("xi", 0.0)
+
+    tuning_options["scaling"] = True
+
+    results = []
+
+    # function to pass to the optimizer
+    def func(**kwargs):
+        args = [kwargs[key] for key in tuning_options.tune_params.keys()]
+        return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results)
+
+    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
+    pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds))
+
+    verbose = 0
+    if tuning_options.verbose:
+        verbose = 2
+
+    # print(np.isnan(init_points).any())
+
+    optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose)
+
+    optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi)
+
+    if tuning_options.verbose:
+        print(optimizer.max)
+
+    return results, runner.dev.get_environment()
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
new file mode 100644
index 000000000..6107fad0b
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_old.py
@@ -0,0 +1,837 @@
+""" Bayesian Optimization implementation from the thesis by Willemsen """
+from copy import deepcopy
+from random import randint, shuffle
+import itertools
+import warnings
+import time
+
+import numpy as np
+
+# BO imports
+try:
+    from typing import Tuple
+    from scipy.stats import norm
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
+    from sklearn.exceptions import ConvergenceWarning
+    from skopt.sampler import Lhs
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from kernel_tuner.strategies import minimize
+from kernel_tuner import util
+
+supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
+
+
+def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
+    """ Generates normalization and denormalization dictionaries """
+    original_to_normalized = dict()
+    normalized_to_original = dict()
+    for param_name in tune_params.keys():
+        original_to_normalized_dict = dict()
+        normalized_to_original_dict = dict()
+        for value_index, value in enumerate(tune_params[param_name]):
+            normalized_value = eps * value_index + 0.5 * eps
+            normalized_to_original_dict[normalized_value] = value
+            original_to_normalized_dict[value] = normalized_value
+        original_to_normalized[param_name] = original_to_normalized_dict
+        normalized_to_original[param_name] = normalized_to_original_dict
+    return original_to_normalized, normalized_to_original
+
+
+def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
+    """ Normalize the parameter space given a normalization dictionary """
+    keys = list(tune_params.keys())
+    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
+    return param_space_normalized
+
+
+def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
+    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    pruned_tune_params_mask = list()
+    removed_tune_params = list()
+    param_names = list(tune_params.keys())
+    for index, key in enumerate(tune_params.keys()):
+        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
+        if len(tune_params[key]) > 1:
+            removed_tune_params.append(None)
+        else:
+            value = tune_params[key][0]
+            normalized = normalize_dict[param_names[index]][value]
+            removed_tune_params.append(normalized)
+    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
+        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
+    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
+    return parameter_space, removed_tune_params
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process. Allows setting hyperparameters via the strategy_options key.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
+    if not bayes_opt_present:
+        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed")
+
+    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
+    tune_params = tuning_options.tune_params
+    tuning_options["scaling"] = True
+    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
+
+    # compute cartesian product of all tunable parameters
+    parameter_space = itertools.product(*tune_params.values())
+
+    # check for search space restrictions
+    if tuning_options.restrictions is not None:
+        tuning_options.verbose = False
+    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
+    parameter_space = list(parameter_space)
+    if len(parameter_space) < 1:
+        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
+    if len(parameter_space) == 1:
+        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
+
+    # normalize search space to [0,1]
+    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
+    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
+
+    # prune the parameter space to remove dimensions that have a constant parameter
+    if prune_parameterspace:
+        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
+    else:
+        parameter_space = list(parameter_space)
+        removed_tune_params = [None] * len(tune_params.keys())
+
+    # initialize and optimize
+    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
+    results = bo.optimize(max_fevals)
+
+    return results, runner.dev.get_environment()
+
+
+class BayesianOptimization():
+
+    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
+                 runner, opt_direction='min'):
+        time_start = time.perf_counter_ns()
+
+        # supported hyperparameter values
+        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
+        self.supported_methods = supported_methods
+        self.supported_sampling_methods = ["random", "lhs"]
+        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
+
+        def get_hyperparam(name: str, default, supported_values=list()):
+            value = tuning_options.strategy_options.get(name, default)
+            if len(supported_values) > 0 and value not in supported_values:
+                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+            return value
+
+        # get hyperparameters
+        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
+        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
+        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
+        acq = acquisition_function
+        acq_params = get_hyperparam("methodparams", {})
+        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
+        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
+        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
+        self.num_initial_samples = get_hyperparam("popsize", 20)
+        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
+        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
+        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
+
+        # set acquisition function hyperparameter defaults where missing
+        if 'explorationfactor' not in acq_params:
+            acq_params['explorationfactor'] = 'CV'
+        if 'zeta' not in acq_params:
+            acq_params['zeta'] = 1
+        if 'skip_duplicate_after' not in acq_params:
+            acq_params['skip_duplicate_after'] = 5
+
+        # set arguments
+        self.kernel_options = kernel_options
+        self.tuning_options = tuning_options
+        self.tune_params = tuning_options.tune_params
+        self.param_names = list(self.tune_params.keys())
+        self.normalized_dict = normalize_dict
+        self.denormalized_dict = denormalize_dict
+        self.runner = runner
+        self.max_threads = runner.dev.max_threads
+        self.log_timings = False
+
+        # set optimization constants
+        self.invalid_value = 1e20
+        self.opt_direction = opt_direction
+        if opt_direction == 'min':
+            self.worst_value = np.PINF
+            self.argopt = np.argmin
+        elif opt_direction == 'max':
+            self.worst_value = np.NINF
+            self.argopt = np.argmax
+        else:
+            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
+
+        # set the acquisition function and surrogate model
+        self.optimize = self.__optimize
+        self.af_name = acquisition_function
+        self.af_params = acq_params
+        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
+        self.set_acquisition_function(acquisition_function)
+        self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
+
+        # set remaining values
+        self.results = []
+        self.__searchspace = searchspace
+        self.removed_tune_params = removed_tune_params
+        self.searchspace_size = len(self.searchspace)
+        self.num_dimensions = len(self.dimensions())
+        self.__current_optimum = self.worst_value
+        self.cv_norm_maximum = None
+        self.fevals = 0
+        self.__visited_num = 0
+        self.__visited_valid_num = 0
+        self.__visited_searchspace_indices = [False] * self.searchspace_size
+        self.__observations = [np.NaN] * self.searchspace_size
+        self.__valid_observation_indices = [False] * self.searchspace_size
+        self.__valid_params = list()
+        self.__valid_observations = list()
+        self.unvisited_cache = self.unvisited()
+        time_setup = time.perf_counter_ns()
+        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
+
+        # take initial sample
+        if self.num_initial_samples > 0:
+            self.initial_sample()
+            time_initial_sample = time.perf_counter_ns()
+
+        # print the timings
+        if self.log_timings:
+            time_taken_setup = round(time_setup - time_start, 3) / 1000
+            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
+            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
+            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
+
+    @property
+    def searchspace(self):
+        return self.__searchspace
+
+    @property
+    def observations(self):
+        return self.__observations
+
+    @property
+    def current_optimum(self):
+        return self.__current_optimum
+
+    @current_optimum.setter
+    def current_optimum(self, value: float):
+        self.__current_optimum = value
+
+    def is_better_than(self, a: float, b: float) -> bool:
+        """ Determines which one is better depending on optimization direction """
+        return a < b if self.opt_direction == 'min' else a > b
+
+    def is_not_visited(self, index: int) -> bool:
+        """ Returns whether a searchspace index has not been visited """
+        return not self.__visited_searchspace_indices[index]
+
+    def is_valid(self, observation: float) -> bool:
+        """ Returns whether an observation is valid """
+        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+
+    def get_af_by_name(self, name: str):
+        """ Get the basic acquisition functions by their name """
+        basic_af_names = ['ei', 'poi', 'lcb']
+        if name == 'ei':
+            return self.af_expected_improvement
+        elif name == 'poi':
+            return self.af_probability_of_improvement
+        elif name == 'lcb':
+            return self.af_lower_confidence_bound
+        raise ValueError(f"{name} not in {basic_af_names}")
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function """
+        if acquisition_function == 'poi':
+            self.__af = self.af_probability_of_improvement
+        elif acquisition_function == 'ei':
+            self.__af = self.af_expected_improvement
+        elif acquisition_function == 'lcb':
+            self.__af = self.af_lower_confidence_bound
+        elif acquisition_function == 'lcb-srinivas':
+            self.__af = self.af_lower_confidence_bound_srinivas
+        elif acquisition_function == 'random':
+            self.__af = self.af_random
+        elif acquisition_function == 'multi':
+            self.optimize = self.__optimize_multi
+        elif acquisition_function == 'multi-advanced':
+            self.optimize = self.__optimize_multi_advanced
+        elif acquisition_function == 'multi-fast':
+            self.optimize = self.__optimize_multi_fast
+        else:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
+        """ Set the surrogate model with a covariance function and lengthscale """
+        if cov_kernel_name == "constantrbf":
+            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "rbf":
+            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern32":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern52":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
+        else:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_cov_kernels, cov_kernel_name))
+        self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
+
+    def valid_params_observations(self) -> Tuple[list, list]:
+        """ Returns a list of valid observations and their parameter configurations """
+        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
+        params = list()
+        observations = list()
+        for index, valid in enumerate(self.__valid_observation_indices):
+            if valid is True:
+                params.append(self.searchspace[index])
+                observations.append(self.observations[index])
+        return params, observations
+
+    def unvisited(self) -> list:
+        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
+        return params
+
+    def find_param_config_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the search space if it exists """
+        return self.searchspace.index(param_config)
+
+    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the unvisited cache if it exists """
+        return self.unvisited_cache.index(param_config)
+
+    def normalize_param_config(self, param_config: tuple) -> tuple:
+        """ Normalizes a parameter configuration """
+        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return normalized
+
+    def denormalize_param_config(self, param_config: tuple) -> tuple:
+        """ Denormalizes a parameter configuration """
+        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return denormalized
+
+    def unprune_param_config(self, param_config: tuple) -> tuple:
+        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        unpruned = list()
+        pruned_count = 0
+        for removed in self.removed_tune_params:
+            if removed is not None:
+                unpruned.append(removed)
+            else:
+                unpruned.append(param_config[pruned_count])
+                pruned_count += 1
+        return tuple(unpruned)
+
+    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
+        """ Adjust the visited and valid index records accordingly """
+        validity = self.is_valid(observation)
+        self.__visited_num += 1
+        self.__observations[index] = observation
+        self.__visited_searchspace_indices[index] = True
+        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
+        self.__valid_observation_indices[index] = validity
+        if validity is True:
+            self.__visited_valid_num += 1
+            self.__valid_params.append(param_config)
+            self.__valid_observations.append(observation)
+            if self.is_better_than(observation, self.current_optimum):
+                self.current_optimum = observation
+
+    def predict(self, x) -> Tuple[float, float]:
+        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        return self.__model.predict([x], return_std=True)
+
+    def predict_list(self, lst: list) -> Tuple[list, list, list]:
+        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            mu, std = self.__model.predict(lst, return_std=True)
+            return list(zip(mu, std)), mu, std
+
+    def fit_observations_to_model(self):
+        """ Update the model based on the current list of observations """
+        self.__model.fit(self.__valid_params, self.__valid_observations)
+
+    def evaluate_objective_function(self, param_config: tuple) -> float:
+        """ Evaluates the objective function """
+        param_config = self.unprune_param_config(param_config)
+        denormalized_param_config = self.denormalize_param_config(param_config)
+        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
+            return self.invalid_value
+        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+        self.fevals += 1
+        return val
+
+    def dimensions(self) -> list:
+        """ List of parameter values per parameter """
+        return self.tune_params.values()
+
+    def draw_random_sample(self) -> Tuple[list, int]:
+        """ Draw a random sample from the unvisited parameter configurations """
+        if len(self.unvisited_cache) < 1:
+            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
+        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
+        param_config = self.unvisited_cache[index]
+        actual_index = self.find_param_config_index(param_config)
+        return param_config, actual_index
+
+    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
+        """ Draws an LHS-distributed sample from the search space """
+        if self.searchspace_size < num_samples:
+            raise ValueError("Can't sample more than the size of the search space")
+        if self.sampling_crit is None:
+            lhs = Lhs(lhs_type="centered", criterion=None)
+        else:
+            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
+        param_configs = lhs.generate(self.dimensions(), num_samples)
+        indices = list()
+        normalized_param_configs = list()
+        for i in range(len(param_configs) - 1):
+            try:
+                param_config = self.normalize_param_config(param_configs[i])
+                index = self.find_param_config_index(param_config)
+                indices.append(index)
+                normalized_param_configs.append(param_config)
+            except ValueError:
+                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
+                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
+                continue
+        return list(zip(normalized_param_configs, indices))
+
+    def initial_sample(self):
+        """ Draws an initial sample using random sampling """
+        if self.num_initial_samples <= 0:
+            raise ValueError("At least one initial sample is required")
+        if self.sampling_method == 'lhs':
+            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
+        elif self.sampling_method == 'random':
+            samples = list()
+        else:
+            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
+        # collect the samples
+        collected_samples = 0
+        for params, index in samples:
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            if self.is_valid(observation):
+                collected_samples += 1
+        # collect the remainder of the samples
+        while collected_samples < self.num_initial_samples:
+            params, index = self.draw_random_sample()
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            # check for validity to avoid having no actual initial samples
+            if self.is_valid(observation):
+                collected_samples += 1
+        self.fit_observations_to_model()
+        _, _, std = self.predict_list(self.unvisited_cache)
+        self.initial_sample_mean = np.mean(self.__valid_observations)
+        # Alternatively:
+        # self.initial_sample_std = np.std(self.__valid_observations)
+        # self.initial_sample_mean = np.mean(predictions)
+        self.initial_std = np.mean(std)
+        self.cv_norm_maximum = self.initial_std
+
+    def contextual_variance(self, std: list):
+        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        if not self.af_params['explorationfactor'] == 'CV':
+            return None
+        if self.opt_direction == 'min':
+            if self.current_optimum == self.worst_value:
+                return 0.01
+            if self.current_optimum <= 0:
+                # doesn't work well for minimization beyond 0, should that even be a thing?
+                return abs(np.mean(std) / self.current_optimum)
+            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
+            cv = np.mean(std) / improvement_over_initial_sample
+            # normalize if available
+            if self.cv_norm_maximum:
+                cv = cv / self.cv_norm_maximum
+            return cv
+        return np.mean(std) / self.current_optimum
+
+    def __optimize(self, max_fevals):
+        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        while self.fevals < max_fevals:
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
+            list_of_acquisition_values = self.__af(predictions, hyperparam)
+            # afterwards select the best AF value
+            best_af = self.argopt(list_of_acquisition_values)
+            candidate_params = self.unvisited_cache[best_af]
+            candidate_index = self.find_param_config_index(candidate_params)
+            observation = self.evaluate_objective_function(candidate_params)
+            self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.fit_observations_to_model()
+        return self.results
+
+    def __optimize_multi(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
+        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
+        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
+        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
+        discount_factor = self.multi_afs_discount_factor
+        # setup the registration of duplicates and runtimes
+        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
+        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
+        skip_af_index = list()
+        af_runtimes = [0, 0, 0]
+        af_observations = [list(), list(), list()]
+        initial_sample_mean = np.mean(self.__valid_observations)
+        while self.fevals < max_fevals:
+            time_start = time.perf_counter_ns()
+            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
+            aqfs = self.multi_afs
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            time_predictions = time.perf_counter_ns()
+            actual_candidate_params = list()
+            actual_candidate_indices = list()
+            actual_candidate_af_indices = list()
+            duplicate_candidate_af_indices = list()
+            duplicate_candidate_original_af_indices = list()
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                timer_start = time.perf_counter()
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                time_taken = time.perf_counter() - timer_start
+                af_runtimes[af_index] += time_taken
+                is_duplicate = best_af in actual_candidate_indices
+                if not is_duplicate:
+                    candidate_params = self.unvisited_cache[best_af]
+                    actual_candidate_params.append(candidate_params)
+                    actual_candidate_indices.append(best_af)
+                    actual_candidate_af_indices.append(af_index)
+                # register whether the AF suggested a duplicate candidate
+                duplicate_candidate_af_count[af_index].pop(0)
+                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
+                if is_duplicate:
+                    # find the index of the AF that first registered the duplicate
+                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
+                    # register that AF as duplicate as well
+                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
+                    duplicate_candidate_af_indices.append(af_index)
+                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
+            time_afs = time.perf_counter_ns()
+            # evaluate the non-duplicate candidates
+            for index, af_index in enumerate(actual_candidate_af_indices):
+                candidate_params = actual_candidate_params[index]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if observation != self.invalid_value:
+                    # we use the registered observations for maximization of the discounted reward
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
+                else:
+                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
+                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
+            for index, af_index in enumerate(duplicate_candidate_af_indices):
+                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
+                af_observations[af_index].append(original_observation)
+            self.fit_observations_to_model()
+            time_eval = time.perf_counter_ns()
+            # assert that all observation lists of non-skipped acquisition functions are of the same length
+            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
+            # find the AFs elligble for being skipped
+            candidates_for_skip = list()
+            for af_index, count in enumerate(duplicate_candidate_af_count):
+                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
+                    candidates_for_skip.append(af_index)
+            # do not skip the AF with the lowest runtime
+            if len(candidates_for_skip) > 1:
+                candidates_for_skip_discounted = list(
+                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
+                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
+                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
+                for af_index in candidates_for_skip:
+                    if af_index == af_not_to_skip:
+                        # do not skip the AF with the lowest runtime and give it a clean slate
+                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
+                        continue
+                    skip_af_index.append(af_index)
+                    if len(skip_af_index) >= len(aqfs):
+                        raise ValueError("There are no acquisition functions left! This should not happen...")
+            time_af_selection = time.perf_counter_ns()
+
+            # printing timings
+            if self.log_timings:
+                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
+                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
+                time_taken_eval = round(time_eval - time_afs, 3) / 1000
+                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
+                time_taken_total = round(time_af_selection - time_start, 3) / 1000
+                print(
+                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
+                    flush=True)
+        return self.results
+
+    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        aqfs = self.multi_afs
+        discount_factor = self.multi_afs_discount_factor
+        required_improvement_factor = self.multi_afs_required_improvement_factor
+        required_improvement_worse = 1 + required_improvement_factor
+        required_improvement_better = 1 - required_improvement_factor
+        min_required_count = self.af_params['skip_duplicate_after']
+        skip_af_index = list()
+        single_af = len(aqfs) <= len(skip_af_index) + 1
+        af_observations = [list(), list(), list()]
+        af_performs_worse_count = [0, 0, 0]
+        af_performs_better_count = [0, 0, 0]
+        while self.fevals < max_fevals:
+            if single_af:
+                return self.__optimize(max_fevals)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            observations_median = np.median(self.__valid_observations)
+            if increase_precision is False:
+                predictions, _, std = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(std)
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                if increase_precision is True:
+                    predictions, _, std = self.predict_list(self.unvisited_cache)
+                    hyperparam = self.contextual_variance(std)
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[best_af]    # to avoid going out of bounds
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if increase_precision is True:
+                    self.fit_observations_to_model()
+                # we use the registered observations for maximization of the discounted reward
+                if observation != self.invalid_value:
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[af_index].append(reg_observation)
+                else:
+                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
+                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
+                    af_observations[af_index].append(reg_invalid_observation)
+            if increase_precision is False:
+                self.fit_observations_to_model()
+
+            # calculate the mean of discounted observations over the remaining acquisition functions
+            discounted_obs = list(
+                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
+            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
+
+            # register which AFs perform more than 10% better than average and which more than 10% worse than average
+            for af_index, discounted_observation in enumerate(discounted_obs):
+                if discounted_observation > disc_obs_mean * required_improvement_worse:
+                    af_performs_worse_count[af_index] += 1
+                elif discounted_observation < disc_obs_mean * required_improvement_better:
+                    af_performs_better_count[af_index] += 1
+
+            # find the worst AF, discounted observations is leading for a draw
+            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
+            af_index_worst = -1
+            if worst_count >= min_required_count:
+                for af_index, count in enumerate(af_performs_worse_count):
+                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
+                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
+                        af_index_worst = af_index
+
+            # skip the worst AF
+            if af_index_worst > -1:
+                skip_af_index.append(af_index_worst)
+                # reset the counts to even the playing field for the remaining AFs
+                af_performs_worse_count = [0, 0, 0]
+                af_performs_better_count = [0, 0, 0]
+                # if there is only one AF left, register as single AF
+                if len(aqfs) <= len(skip_af_index) + 1:
+                    single_af = True
+                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+                    assert len(af_indices_left) == 1
+                    self.__af = aqfs[af_indices_left[0]]
+            else:
+                # find the best AF, discounted observations is leading for a draw
+                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
+                af_index_best = -1
+                if best_count >= min_required_count:
+                    for af_index, count in enumerate(af_performs_better_count):
+                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
+                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
+                            af_index_best = af_index
+                # make the best AF single
+                if af_index_best > -1:
+                    single_af = True
+                    self.__af = aqfs[af_index_best]
+
+        return self.results
+
+    def __optimize_multi_fast(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        while self.fevals < max_fevals:
+            aqfs = self.multi_afs
+            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            for af in aqfs:
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[best_af]    # to avoid going out of bounds
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.fit_observations_to_model()
+        return self.results
+
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = range(len(self.unvisited_cache))
+        shuffle(list_random)
+        return list_random
+
+    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Probability of Improvement (PI) """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement
+        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1E-9)) for (x_mu, x_std) in predictions)
+
+        # compute probability of improvement with CDF in bulk
+        list_prob_improvement = norm.cdf(list_diff_improvement)
+
+        return list_prob_improvement
+
+    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Expected Improvement (EI) """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement, CDF and PDF in bulk
+        list_diff_improvement = list((fplus - x_mu) / (x_std + 1E-9) for (x_mu, x_std) in predictions)
+        list_cdf = norm.cdf(list_diff_improvement)
+        list_pdf = norm.pdf(list_diff_improvement)
+
+        # specify AF calculation
+        def exp_improvement(index) -> float:
+            x_mu, x_std = predictions[index]
+            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
+            return -ei
+
+        # calculate AF
+        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
+        return list_exp_improvement
+
+    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (LCB) """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        beta = hyperparam
+
+        # compute LCB in bulk
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        return list_lower_confidence_bound
+
+    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+
+        # precompute beta parameter
+        zeta = self.af_params['zeta']
+        t = self.fevals
+        d = self.num_dimensions
+        delta = hyperparam
+        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
+
+        # compute UCB in bulk
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        return list_lower_confidence_bound
+
+    def visualize_after_opt(self):
+        """ Visualize the model after the optimization """
+        print(self.__model.kernel_.get_params())
+        print(self.__model.log_marginal_likelihood())
+        import matplotlib.pyplot as plt
+        _, mu, std = self.predict_list(self.searchspace)
+        brute_force_observations = list()
+        for param_config in self.searchspace:
+            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+            if obs == self.invalid_value:
+                obs = None
+            brute_force_observations.append(obs)
+        x_axis = range(len(mu))
+        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
+        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
+        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
+        plt.legend()
+        plt.show()

From 3273dd3e26e908d082a0259c6b88752437d6d3ca Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 12 Jan 2022 11:32:52 +0100
Subject: [PATCH 002/253] Completely new Bayesian Optimizaation implementation

---
 .gitignore                                    |   1 +
 kernel_tuner/core.py                          |  39 +-
 kernel_tuner/cupy.py                          |  32 +-
 kernel_tuner/interface.py                     |   3 +-
 kernel_tuner/python.py                        | 147 ++++
 kernel_tuner/strategies/bayes_opt_GPyTorch.py |  14 +-
 .../strategies/bayes_opt_GPyTorch_lean.py     | 746 ++++++++++++++++++
 kernel_tuner/util.py                          |  15 +-
 8 files changed, 956 insertions(+), 41 deletions(-)
 create mode 100644 kernel_tuner/python.py
 create mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py

diff --git a/.gitignore b/.gitignore
index 0bf256bf8..90d7e1c89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ examples/cuda/output
 deploy_key
 *.mod
 temp_*.*
+.DS_Store
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 68fee0b39..fac470b8b 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -16,17 +16,15 @@
 from kernel_tuner.cuda import CudaFunctions
 from kernel_tuner.opencl import OpenCLFunctions
 from kernel_tuner.c import CFunctions
+from kernel_tuner.python import PythonFunctions
 from kernel_tuner.nvml import NVMLObserver
 import kernel_tuner.util as util
 
-
 try:
     import torch
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-
-
 _KernelInstance = namedtuple("_KernelInstance", ["name", "kernel_source", "kernel_string", "temp_files", "threads", "grid", "params", "arguments"])
 
 
@@ -173,7 +171,8 @@ def get_suffix(self, index=0):
         _suffixes = {
             'CUDA': '.cu',
             'OpenCL': '.cl',
-            'C': '.c'
+            'C': '.c',
+            'Python': '.py'
         }
         try:
             return _suffixes[self.lang]
@@ -237,6 +236,8 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
             dev = OpenCLFunctions(device, platform, compiler_options=compiler_options, iterations=iterations, observers=observers)
         elif lang == "C":
             dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations)
+        elif lang == "Python":
+            dev = PythonFunctions(iterations=iterations)
         else:
             raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
 
@@ -507,23 +508,23 @@ def _default_verify_function(instance, answer, result_host, atol, verbose):
         if answer[i] is not None:    #skip None elements in the answer list
             if isinstance(answer[i], (np.ndarray, cp.ndarray)) and isinstance(arg, (np.ndarray, cp.ndarray)):
                 if answer[i].dtype != arg.dtype:
-                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " +
-                                    str(answer[i].dtype) + " != " + str(arg.dtype) + ".")
+                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " + str(answer[i].dtype) +
+                                    " != " + str(arg.dtype) + ".")
                 if answer[i].size != arg.size:
-                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " +
-                                    str(answer[i].size) + " != " + str(arg.size) + ".")
+                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " + str(answer[i].size) +
+                                    " != " + str(arg.size) + ".")
             elif isinstance(answer[i], torch.Tensor) and isinstance(arg, torch.Tensor):
                 if answer[i].dtype != arg.dtype:
-                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " +
-                                    str(answer[i].dtype) + " != " + str(arg.dtype) + ".")
+                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " + str(answer[i].dtype) +
+                                    " != " + str(arg.dtype) + ".")
                 if answer[i].size() != arg.size():
-                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " +
-                                    str(answer[i].size) + " != " + str(arg.size) + ".")
+                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " + str(answer[i].size) +
+                                    " != " + str(arg.size) + ".")
 
             elif isinstance(answer[i], np.number) and isinstance(arg, np.number):
                 if answer[i].dtype != arg.dtype:
-                    raise TypeError(f"Element {i} of the expected results list is not the same as the kernel output: " + str(answer[i].dtype) +
-                                    " != " + str(arg.dtype) + ".")
+                    raise TypeError(f"Element {i} of the expected results list is not the same as the kernel output: " + str(answer[i].dtype) + " != " +
+                                    str(arg.dtype) + ".")
             else:
                 #either answer[i] and argument have different types or answer[i] is not a numpy type
                 if not isinstance(answer[i], (np.ndarray, cp.ndarray, torch.Tensor)) or not isinstance(answer[i], np.number):
@@ -572,7 +573,6 @@ def _flatten(a):
     return correct
 
 
-
 #these functions facilitate compiling templated kernels with PyCuda
 def split_argument_list(argument_list):
     """split all arguments in a list into types and names"""
@@ -587,20 +587,24 @@ def split_argument_list(argument_list):
         name_list.append(match.group(2).strip())
     return type_list, name_list
 
+
 def apply_template_typenames(type_list, templated_typenames):
     """replace the typename tokens in type_list with their templated typenames"""
+
     def replace_typename_token(matchobj):
         """function for a whitespace preserving token regex replace"""
         #replace only the match, leaving the whitespace around it as is
         return matchobj.group(1) + templated_typenames[matchobj.group(2)] + matchobj.group(3)
+
     for i, arg_type in enumerate(type_list):
-        for k,v in templated_typenames.items():
+        for k, v in templated_typenames.items():
             #if the templated typename occurs as a token in the string, meaning that it is enclosed in
             #beginning of string or whitespace, and end of string, whitespace or star
             regex = r"(^|\s+)(" + k + r")($|\s+|\*)"
             sub = re.sub(regex, replace_typename_token, arg_type, re.S)
             type_list[i] = sub
 
+
 def get_templated_typenames(template_parameters, template_arguments):
     """based on the template parameters and arguments, create dict with templated typenames"""
     templated_typenames = {}
@@ -610,6 +614,7 @@ def get_templated_typenames(template_parameters, template_arguments):
             templated_typenames[typename] = template_arguments[i]
     return templated_typenames
 
+
 def wrap_templated_kernel(kernel_string, kernel_name):
     """rewrite kernel_string to insert wrapper function for templated kernel"""
     #parse kernel_name to find template_arguments and real kernel name
@@ -626,7 +631,7 @@ def wrap_templated_kernel(kernel_string, kernel_name):
 
     template_parameters = match.group(1).split(',')
     argument_list = match.group(2).split(',')
-    argument_list = [s.strip() for s in argument_list] #remove extra whitespace around 'type name' strings
+    argument_list = [s.strip() for s in argument_list]    #remove extra whitespace around 'type name' strings
 
     type_list, name_list = split_argument_list(argument_list)
 
diff --git a/kernel_tuner/cupy.py b/kernel_tuner/cupy.py
index 5750a94b5..f59c653ee 100644
--- a/kernel_tuner/cupy.py
+++ b/kernel_tuner/cupy.py
@@ -1,7 +1,6 @@
 """This module contains all Cupy specific kernel_tuner functions"""
 from __future__ import print_function
 
-
 import logging
 import time
 import numpy as np
@@ -18,6 +17,7 @@
 
 class CupyRuntimeObserver(BenchmarkObserver):
     """ Observer that measures time using CUDA events during benchmarking """
+
     def __init__(self, dev):
         self.dev = dev
         self.stream = dev.stream
@@ -26,10 +26,13 @@ def __init__(self, dev):
         self.times = []
 
     def after_finish(self):
-        self.times.append(cp.cuda.get_elapsed_time(self.start, self.end)) #ms
+        self.times.append(cp.cuda.get_elapsed_time(self.start, self.end))    #ms
 
     def get_results(self):
-        results = {"time": np.average(self.times), "times": self.times.copy()}
+        results = {
+            "time": np.average(self.times),
+            "times": self.times.copy()
+        }
         self.times = []
         return results
 
@@ -55,7 +58,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
         self.texrefs = []
         if not cp:
             raise ImportError("Error: cupy not installed, please install e.g. " +
-                            "using 'pip install cupy-cuda111', please check https://github.com/cupy/cupy.")
+                              "using 'pip install cupy-cuda111', please check https://github.com/cupy/cupy.")
 
         #select device
         self.dev = dev = cp.cuda.Device(device).__enter__()
@@ -87,7 +90,8 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
         #collect environment information
         env = dict()
         cupy_info = str(cp._cupyx.get_runtime_info()).split("\n")[:-1]
-        info_dict = {s.split(":")[0].strip():s.split(":")[1].strip() for s in cupy_info}
+        info_dict = {s.split(":")[0].strip(): s.split(":")[1].strip()
+                     for s in cupy_info}
         env["device_name"] = info_dict[f'Device {device} Name']
 
         env["cuda_version"] = cp.cuda.runtime.driverGetVersion()
@@ -123,11 +127,10 @@ def ready_argument_list(self, arguments):
                 alloc = cp.array(arg)
                 self.allocations.append(alloc)
                 gpu_args.append(alloc)
-            else: # if not a numpy array, just pass argument along
+            else:    # if not a numpy array, just pass argument along
                 gpu_args.append(arg)
         return gpu_args
 
-
     def compile(self, kernel_instance):
         """call the CUDA compiler to compile the kernel, return the device function
 
@@ -150,13 +153,11 @@ def compile(self, kernel_instance):
 
         options = tuple(compiler_options)
 
-        self.current_module = cp.RawModule(code=kernel_string, options=options,
-                                           name_expressions=[kernel_name])
+        self.current_module = cp.RawModule(code=kernel_string, options=options, name_expressions=[kernel_name])
 
         self.func = self.current_module.get_function(kernel_name)
         return self.func
 
-
     def benchmark(self, func, gpu_args, threads, grid):
         """runs the kernel and measures time repeatedly, returns average time
 
@@ -219,9 +220,10 @@ def copy_constant_memory_args(self, cmem_args):
             to be numpy objects, such as numpy.ndarray or numpy.int32, and so on.
         :type cmem_args: dict( string: numpy.ndarray, ... )
         """
-        logging.debug('copy_constant_memory_args called')
-        logging.debug('current module: ' + str(self.current_module))
-        raise NotImplementedError('CuPy backend does not yet support constant memory')
+        for k, v in cmem_args.items():
+            symbol = self.current_module.get_global(k)
+            constant_mem = cp.ndarray(v.shape, v.dtype, symbol)
+            constant_mem[:] = cp.asarray(v)
 
     def copy_shared_memory_args(self, smem_args):
         """add shared memory arguments to the kernel"""
@@ -302,4 +304,6 @@ def memcpy_htod(self, dest, src):
             src = cp.asarray(src)
         cp.copyto(dest, src)
 
-    units = {'time': 'ms'}
+    units = {
+        'time': 'ms'
+    }
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index ecb7f7197..475966adc 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -45,7 +45,7 @@
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt, greedy_mls, greedy_ils, ordered_greedy_mls, dual_annealing, bayes_opt_old, bayes_opt_GPyTorch, bayes_opt_alt_BOTorch
+from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt, greedy_mls, greedy_ils, ordered_greedy_mls, dual_annealing, bayes_opt_old, bayes_opt_GPyTorch, bayes_opt_GPyTorch_lean, bayes_opt_alt_BOTorch
 
 strategy_map = {
     "brute_force": brute_force,
@@ -65,6 +65,7 @@
     "bayes_opt": bayes_opt,
     "bayes_opt_old": bayes_opt_old,
     "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
+    "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_alt_BOTorch,
 }
 
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
new file mode 100644
index 000000000..9655b068d
--- /dev/null
+++ b/kernel_tuner/python.py
@@ -0,0 +1,147 @@
+""" This module contains the functionality for running and compiling C functions """
+
+from collections import namedtuple
+import platform
+import logging
+import importlib.util
+
+import numpy
+import numpy.ctypeslib
+
+from kernel_tuner.util import get_temp_filename, delete_temp_file, write_file
+
+# This represents an individual kernel argument.
+# It contains a numpy object (ndarray or number) and a ctypes object with a copy
+# of the argument data. For an ndarray, the ctypes object is a wrapper for the ndarray's data.
+Argument = namedtuple("Argument", ["numpy", "ctypes"])
+
+
+class PythonFunctions(object):
+    """Class that groups the code for running and compiling C functions"""
+
+    def __init__(self, iterations=7):
+        """instantiate PythonFunctions object used for interacting with Python code
+
+        :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
+        :type iterations: int
+        """
+        self.iterations = iterations
+        self.max_threads = 1024
+
+        #environment info
+        env = dict()
+        env["iterations"] = self.iterations
+        self.env = env
+        self.name = platform.processor()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc):
+        pass
+
+    def ready_argument_list(self, arguments):
+        """ready argument list to be passed to the Python function
+        """
+        return arguments
+
+    def compile(self, kernel_instance):
+        """ return the function from the kernel instance """
+
+        suffix = kernel_instance.kernel_source.get_user_suffix()
+        source_file = get_temp_filename(suffix=suffix)
+
+        spec = importlib.util.find_spec(kernel_instance.name)
+        foo = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(foo)
+        func = getattr(foo, kernel_instance.name)
+
+        self.params = kernel_instance.params
+
+        delete_temp_file(source_file)
+        return func
+
+    def benchmark(self, func, args, threads, grid):
+        """runs the kernel repeatedly, returns averaged returned value
+
+        The C function tuning is a little bit more flexible than direct CUDA
+        or OpenCL kernel tuning. The C function needs to measure time, or some
+        other quality metric you wish to tune on, on its own and should
+        therefore return a single floating-point value.
+
+        Benchmark runs the C function repeatedly and returns the average of the
+        values returned by the C function. The number of iterations is set
+        during the creation of the CFunctions object. For all measurements the
+        lowest and highest values are discarded and the rest is included in the
+        average. The reason for this is to be robust against initialization
+        artifacts and other exceptional cases.
+
+        :param func: A C function compiled for this specific configuration
+        :type func: ctypes._FuncPtr
+
+        :param args: A list of arguments to the function, order should match the
+            order in the code. The list should be prepared using
+            ready_argument_list().
+        :type args: list(Argument)
+
+        :param threads: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type threads: any
+
+        :param grid: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type grid: any
+
+        :returns: All execution times.
+        :rtype: dict()
+        """
+        result = dict()
+        result["times"] = []
+        for _ in range(self.iterations):
+            value = self.run_kernel(func, args, threads, grid)
+
+            #I would like to replace the following with actually capturing
+            #stderr and detecting the error directly in Python, it proved
+            #however that capturing stderr for non-Python functions from Python
+            #is a rather difficult thing to do
+            #
+            #The current, less than ideal, scheme uses the convention that a
+            #negative time indicates a 'too many resources requested for launch'
+            #which Kernel Tuner can silently ignore
+            if value < 0.0:
+                raise Exception("too many resources requested for launch")
+
+            result["times"].append(value)
+        result["time"] = numpy.mean(result["times"])
+        return result
+
+    def run_kernel(self, func, args, threads, grid):
+        """runs the kernel once, returns whatever the kernel returns
+
+        :param func: A C function compiled for this specific configuration
+        :type func: ctypes._FuncPtr
+
+        :param args: A list of arguments to the function, order should match the
+            order in the code. The list should be prepared using
+            ready_argument_list().
+        :type args: list(Argument)
+
+        :param threads: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type threads: any
+
+        :param grid: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type grid: any
+
+        :returns: A robust average of values returned by the C function.
+        :rtype: float
+        """
+        logging.debug("run_kernel")
+        logging.debug("arguments=" + str([str(arg) for arg in args]))
+
+        time = func(**self.params)
+
+        return time
+
+    units = {}
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
index 31b987ca6..784c7d6c0 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
@@ -49,7 +49,7 @@ def normalize_parameter_space(param_space: list, tune_params: dict, normalized:
     return param_space_normalized
 
 
-def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
+def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict: dict, max_threads: int):
     """ Pruning of the parameter space to remove dimensions that have a constant parameter """
     pruned_tune_params_mask = list()
     removed_tune_params = list()
@@ -64,6 +64,10 @@ def prune_parameter_space(parameter_space, tuning_options, tune_params, normaliz
             removed_tune_params.append(normalized)
     if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
         print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
+    # TODO check whether the number of pruned parameters is correct
+    # print(
+    #     f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}, by util: {util.get_number_of_valid_configs(tuning_options, max_threads)}"
+    # )
     parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
     return parameter_space, removed_tune_params
 
@@ -121,7 +125,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     # prune the parameter space to remove dimensions that have a constant parameter
     if prune_parameterspace:
-        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
+        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict, runner.dev.max_threads)
     else:
         parameter_space = list(parameter_space)
         removed_tune_params = [None] * len(tune_params.keys())
@@ -139,7 +143,7 @@ class ExactGPModel(gpytorch.models.ExactGP):
     def __init__(self, train_x, train_y, likelihood):
         super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
         self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
-        self.covar_module = gpytorch.kernels.MaternKernel(nu=1.5)    # TODO maybe try ScaleKernel(MaternKernel)
+        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
 
     def forward(self, x):
         mean_x = self.mean_module(x)
@@ -400,6 +404,7 @@ def predict(self, x) -> Tuple[float, float]:
     def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
         """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            # TODO use torch.cuda for GPU
             test_x = torch.Tensor(lst)
             observed_pred = self.__likelihood(self.__model(test_x))
             mu = observed_pred.mean
@@ -510,7 +515,7 @@ def train_model_hyperparams(self):
         # set the hyperparameters globally for reference
         self.hyperparams = {
             'loss': loss.item(),
-            'lengthscale': self.__model.covar_module.lengthscale.item(),
+            'lengthscale': self.__model.covar_module.base_kernel.lengthscale.item(),
             'noise': self.__model.likelihood.noise.item(),
         }
         # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
@@ -540,6 +545,7 @@ def initial_sample(self):
             # check for validity to avoid having no actual initial samples
             if self.is_valid(observation):
                 collected_samples += 1
+
         # instantiate the model with the initial sample
         self.__likelihood = gpytorch.likelihoods.GaussianLikelihood()
         self.__tparams = torch.Tensor(self.__valid_params)
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
new file mode 100644
index 000000000..8f8f0be30
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -0,0 +1,746 @@
+""" Lean implementation of Bayesian Optimization with GPyTorch """
+from copy import deepcopy
+from typing import Any, Tuple
+from random import randint, shuffle
+from math import floor, ceil
+import numpy as np
+from numpy.lib.function_base import diff
+import torch
+import gpytorch
+
+from skopt.sampler import Lhs
+from scipy.stats import norm
+
+from kernel_tuner.util import get_valid_configs, config_valid
+from kernel_tuner.strategies import minimize
+from torch.functional import Tensor
+from torch.nn import parameter
+
+supported_initial_sample_methods = ['lhs', 'index', 'random']
+supported_methods = ['ei', 'poi', 'random']
+supported_cov_kernels = ['matern', 'matern_scalekernel']
+supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
+supported_optimizers = ['LBFGS', 'Adam']
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    # set CUDA availability
+    cuda_available = torch.cuda.is_available()
+    cuda_available = False
+    device = torch.device("cuda:0" if cuda_available else "cpu")
+    if cuda_available:
+        print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
+
+
+    # retrieve options with defaults
+    options = tuning_options.strategy_options
+    optimization_direction = options.get("optimization_direction", 'min')
+    num_initial_samples = options.get("popsize", 20)
+    max_fevals = options.get("max_fevals", 100)
+    max_threads = runner.dev.max_threads
+    if max_fevals < num_initial_samples:
+        raise ValueError(f"Maximum number of function evaluations ({max_fevals}) can not be lower than the number of initial samples ({num_initial_samples}) ")
+
+    # enabling scaling will unscale and snap inputs on evaluation, more efficient to keep unscale values in a lookup table
+    tuning_options["snap"] = True
+    tuning_options["scaling"] = False
+
+    # prune the search space using restrictions
+    # TODO look into the efficiency, especially for GEMM (56.47%)
+    parameter_space = get_valid_configs(tuning_options, max_threads)
+
+    # limit max_fevals to max size of the parameter space
+    max_fevals = min(len(parameter_space), max_fevals)
+
+    # execute Bayesian Optimization
+    BO = BayesianOptimization(parameter_space, kernel_options, tuning_options, runner, num_initial_samples, optimization_direction, device)
+    # BO.visualize()
+    all_results = BO.optimize(max_fevals)
+    # BO.visualize()
+
+    return all_results, runner.dev.get_environment()
+
+
+class ExactGPModel(gpytorch.models.ExactGP):
+
+    def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
+        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
+        self.mean_module = gpytorch.means.ZeroMean()
+        if cov_kernel_name == 'matern':
+            self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
+        elif cov_kernel_name == 'matern_scalekernel':
+            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+
+class BayesianOptimization:
+
+    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str, device: torch.device) -> None:
+        self.animate = False    # TODO remove
+
+        # set defaults
+        self.num_initial_samples = num_initial_samples
+        self.fevals = 0
+        self.all_results = []
+        self.unique_results = {}
+        self.current_optimal_config = None
+
+        # set Kernel Tuner data
+        self.kernel_options = kernel_options
+        self.tuning_options = tuning_options
+        self.runner = runner
+        self.max_threads = runner.dev.max_threads
+
+        # get tuning options
+        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "index", supported_initial_sample_methods)
+        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 50)
+        self.training_iter = self.get_hyperparam("trainingiter", 0)
+        self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
+        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5)
+        self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
+        self.optimizer_name = self.get_hyperparam("optimizer", "Adam", supported_optimizers)
+        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", 0.1)
+        acquisition_function_name = self.get_hyperparam("method", "ei", supported_methods)
+        af_params = self.get_hyperparam("methodparams", {})
+
+        # set acquisition function options
+        self.set_acquisition_function(acquisition_function_name)
+        if 'explorationfactor' not in af_params:
+            af_params['explorationfactor'] = 'CV'
+        self.af_params = af_params
+
+        # set Tensors
+        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
+        self.device = device
+        self.out_device = torch.device("cpu")
+        self.dtype = torch.double
+        self.size = len(parameter_space)
+        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
+        self.index_counter = torch.tensor(range(self.size))
+        self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
+        self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
+        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan             # x (param configs) and y (results) must be the same type
+        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device) * 1e-3
+
+        # transform non-numerical parameters to numerical, keep true_param_configs for evaluation function
+        self.param_configs, self.tune_params = self.transform_nonnumerical_params(parameter_space)
+        self.true_param_configs = parameter_space
+
+        # set scaling
+        self.scaled_input = True
+        self.scaled_output = True
+        if not self.scaled_input:
+            self.param_configs_scaled = self.param_configs
+        else:
+            self.apply_scaling_to_inputs()
+
+        # set optimization settings
+        self.invalid_value = 1e20
+        self.optimization_direction = optimization_direction
+        if self.optimization_direction == 'min':
+            self.is_better_than = lambda a, b: a < b
+            self.inf_value = np.PINF
+            self.opt = torch.min
+            self.argopt = torch.argmin
+        elif self.optimization_direction == 'max':
+            self.is_better_than = lambda a, b: a > b
+            self.inf_value = np.NINF
+            self.opt = torch.max
+            self.argopt = torch.argmax
+        else:
+            raise ValueError(f"Invalid optimization direction {self.optimization_direction}")
+
+        # set the model
+        self.current_optimum = self.inf_value
+        self.hyperparams = {
+            'loss': np.nan,
+            'lengthscale': np.nan,
+            'noise': np.nan,
+        }
+        self.initialize_model()
+
+    @property
+    def train_x(self):
+        """ Get the valid parameter configurations """
+        return self.param_configs_scaled[self.valid_configs].to(self.device)
+
+    @property
+    def train_y(self):
+        """ Get the valid results """
+        outputs = self.results[self.valid_configs]
+        if self.scaled_output:
+            # z-score, remove mean and make unit variance to scale it to N(0,1)
+            # alternatively, first min-max the outputs between -1 and +1 and apply a Fisher transformation (np.arctanh)
+            outputs = (outputs - outputs.mean()) / outputs.std()
+        return outputs
+
+    @property
+    def train_y_err(self):
+        """ Get the error on the valid results """
+        std = self.results_std[self.valid_configs]
+        if self.scaled_output and std.std() > 0.0:
+            std = (std - std.mean()) / std.std()
+        return std
+
+    @property
+    def test_x(self):
+        """ Get the not yet visited parameter configurations """
+        return self.param_configs_scaled[self.unvisited_configs].to(self.device)
+
+    @property
+    def test_x_unscaled(self):
+        """ Get the unscaled, not yet visited parameter configurations """
+        return self.param_configs[self.unvisited_configs]
+
+    @property
+    def invalid_x(self):
+        """ Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid) """
+        invalid_mask = (self.unvisited_configs == self.valid_configs)
+        return self.param_configs[invalid_mask]
+
+    def true_param_config_index(self, target_index: int) -> int:
+        """ The index required to get the true config param index when dealing with test_x """
+        # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
+
+        counter_masked = self.index_counter[self.unvisited_configs]
+        return counter_masked[target_index]
+
+    def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tensor:
+        """ Same as true_param_config_index, but for an array of targets in O(n) instead of O(n^2). Assumes the array is sorted in ascending order. """
+        # TODO same trick as true_param_config_index
+
+        true_indices = torch.full_like(target_indices, -1).to(self.device)
+        target_index_index = 0
+        target_index = target_indices[target_index_index]
+        count = -1
+        for index, value in enumerate(self.unvisited_configs):
+            if value == True:
+                count += 1
+            if count == target_index:
+                true_indices[target_index_index] = index
+                target_index_index += 1
+                if target_index_index == len(target_indices):
+                    break
+                target_index = target_indices[target_index_index]
+
+        return true_indices
+
+    def initialize_model(self):
+        """ Initialize the surrogate model """
+        self.initial_sample()
+
+        # create the model
+        if self.likelihood_name == 'Gaussian':
+            self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        elif self.likelihood_name == 'FixedNoise':
+            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=1.0e-4), learn_additional_noise=False)
+        self.likelihood = self.likelihood.to(self.device)
+        self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
+
+        # Find optimal model hyperparameters
+        self.model.train()
+        self.likelihood.train()
+        model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
+
+        # LBFGS is probably better as Adam is only first-order
+        if self.optimizer_name == 'LBFGS':
+            self.optimizer = torch.optim.LBFGS(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'Adam':
+            self.optimizer = torch.optim.Adam(model_parameters, lr=self.optimizer_learningrate)
+
+        self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model).to(self.device)
+        self.train_hyperparams(self.initial_training_iter)
+
+    def initial_sample(self):
+        """ Take an initial sample of the parameter space """
+        param_configs = list()
+
+        # first apply the initial sampling method
+        if self.initial_sample_method == 'lhs':
+            indices, param_configs = self.get_lhs_sample()
+            for index in indices:
+                # indices may be -1 because of parameter filtering etc., so we replace those with index-spaces samples
+                if index != -1:
+                    self.evaluate_config(index)
+        elif self.initial_sample_method == 'random':
+            while self.fevals < self.num_initial_samples:
+                param_config_index = randint(0, self.size - 1)
+                param_config = tuple(self.param_configs_scaled[param_config_index].tolist())
+                if param_config in param_configs:
+                    continue
+                param_configs.append(param_config)
+                self.evaluate_config(param_config_index)
+
+        # then take index-spaced samples until all samples are valid
+        while self.fevals < self.num_initial_samples:
+            least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
+            self.evaluate_config(least_evaluated_region_index)
+
+        # set the current optimum, initial sample mean and initial sample std
+        self.current_optimum = self.opt(self.train_y).item()
+        self.initial_sample_mean = self.train_y.mean().item()
+        self.initial_sample_std = None
+
+        # save a boolean mask of the initial samples
+        self.inital_sample_configs = self.valid_configs.detach().clone()
+
+    def get_lhs_sample(self) -> Tuple[list, list]:
+        """ Get a centered Latin Hypercube Sample """
+        param_configs = list()
+        n_samples = self.num_initial_samples
+        temp_param_configs = [[] for _ in range(n_samples)]
+        for param_values in self.tune_params.values():
+            l = len(param_values)
+
+            # determine the interval and offset
+            interval = l / n_samples
+            offset = 0
+            if l > n_samples:
+                # take the difference between the last index and the end of the list, and the first index and the start of the list
+                offset = ((l - 1 - interval * n_samples) - interval) / 2
+
+            # assemble the parameter configurations
+            for i in range(n_samples):
+                index = ceil(offset + interval * (i + 1)) - 1
+                temp_param_configs[i].append(param_values[index])
+
+        # set the actual parameter configurations
+        for param_config in temp_param_configs:
+            param_config = tuple(param_config)
+            param_configs.append(param_config)
+        param_configs = torch.tensor(param_configs, dtype=self.dtype).to(self.device)
+
+        # get the indices of the parameter configurations in O(n^2)
+        param_configs_indices = [-1 for _ in range(n_samples)]
+        for index, param_config in enumerate(self.param_configs):
+            for selected_index, selected_param_config in enumerate(param_configs):
+                if torch.allclose(selected_param_config, param_config, equal_nan=False) and index not in param_configs_indices:
+                    param_configs_indices[selected_index] = index
+
+        if param_configs_indices.count(-1) > n_samples / 2:
+            print(f"No good fit was found in {param_configs_indices.count(-1)} out of the {n_samples} samples. Perhaps try something other than LHS.")
+        return param_configs_indices, param_configs
+
+    def get_middle_index_of_least_evaluated_region(self) -> int:
+        """ Get the middle index of the region of parameter configurations that is the least visited """
+        # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
+        distance_counter = -1
+        distance_tensor = torch.zeros_like(self.unvisited_configs, dtype=torch.int)     # TODO check if .to(self.device) is faster or slower
+        for index, unvisited in enumerate(self.unvisited_configs):
+            if unvisited:
+                distance_counter += 1
+            if not unvisited:
+                distance_counter = 0
+            distance_tensor[index] = distance_counter
+
+        biggest_distance_index = distance_tensor.argmax()
+        biggest_distance = distance_tensor[biggest_distance_index].item()
+        middle_index = biggest_distance_index - round(biggest_distance / 2)
+        # print(f"Max distance {biggest_distance}, index: {middle_index}, between: {biggest_distance_index-biggest_distance}-{biggest_distance_index}")
+        return middle_index
+
+    def find_nearest(self, value, array: torch.Tensor):
+        """ Find the value nearest to the given value in the array """
+        index = (torch.abs(array - value)).argmin()
+        return array[index]
+
+    def train_hyperparams(self, training_iter: int):
+        """ Optimize the surrogate model hyperparameters iteratively """
+        self.model.train()
+        self.likelihood.train()
+
+        def closure():
+            self.optimizer.zero_grad()
+            output = self.model(self.train_x)    # get model output
+            try:
+                loss = -self.mll(output, self.train_y)    # calculate loss and backprop gradients
+                loss.backward()
+                return loss
+            except gpytorch.utils.errors.NotPSDError:
+                print(f"WARNING - matrix not positive definite during training")
+
+        loss = None
+        for _ in range(training_iter):
+            _loss = self.optimizer.step(closure)
+            if _loss is not None:
+                loss = _loss
+
+        # set the hyperparams to the new values
+        try:
+            lengthscale = self.model.covar_module.lengthscale.item()
+        except AttributeError:
+            lengthscale = self.model.covar_module.base_kernel.lengthscale.item()
+        self.hyperparams = {
+            'loss': float(loss.item()) if loss is not None else np.nan,
+            'lengthscale': float(lengthscale),
+            'noise': float(self.model.likelihood.noise.mean().detach()),
+        }
+
+        # get into evaluation (predictive posterior) mode
+        self.model.eval()
+        self.likelihood.eval()
+
+    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
+        """ Optimize the objective """
+        predictions_tuple = None
+        short_param_config_index = None
+        last_invalid = False
+        report_multiple_minima = round(self.size / 10)    # if more than 10% of the space is minima, print a warning
+        use_contextual_variance = self.af_params['explorationfactor'] == 'CV'
+        while self.fevals < max_fevals:
+            if last_invalid:
+                # TODO no need to get the predictions again as the predictions are unchanged, just set the invalid param config mean to the worst non-NAN value and the std to 0
+                # predictions_tuple[0][short_param_config_index] = torch.nanmean(predictions_tuple[0])
+                # predictions_tuple[1][short_param_config_index] = 0
+                predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
+            else:
+                predictions_tuple = self.predict_list()
+                if self.initial_sample_std is None:
+                    self.initial_sample_std = predictions_tuple[1].mean().item()
+            hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
+            acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
+            short_param_config_index = self.argopt(acquisition_values)
+            param_config_index = self.true_param_config_index(short_param_config_index)
+
+            # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
+            min_acquisition_function_value = acquisition_values[short_param_config_index]
+            indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
+            if len(indices_where_min) > 1:
+                # first get the true index for the minima
+                true_indices_where_min = self.true_param_config_indices(indices_where_min)
+                # then get the index of the least evaluated region
+                least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
+                # now find the minima closest to the least evaluated region
+                param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
+                short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
+                if len(indices_where_min) > report_multiple_minima:
+                    print(
+                        f"WARNING - after {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region"
+                    )
+
+            # evaluate and register the result
+            result = self.evaluate_config(param_config_index)
+            if result == self.invalid_value and short_param_config_index > -1:
+                # can't use last_invalid if there were multiple minima in the acquisition function values, because short_param_config_index will not be set
+                last_invalid = True
+            else:
+                last_invalid = False
+                self.model.set_train_data(self.train_x, self.train_y, strict=False)
+                if self.training_iter > 0:
+                    self.train_hyperparams(training_iter=self.training_iter)
+                # set the current optimum
+                self.current_optimum = self.opt(self.train_y).item()
+            # print(f"Valid: {len(self.train_x)}, unvisited: {len(self.test_x)}, invalid: {len(self.invalid_x)}, last invalid: {last_invalid}")
+            if self.animate:
+                self.visualize()
+
+        return self.all_results
+
+    def objective_function(self, param_config: tuple) -> float:
+        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results)
+
+    def evaluate_config(self, param_config_index: int) -> float:
+        """ Evaluates a parameter configuration, returns the time """
+        param_config = self.true_param_configs[param_config_index]
+        time = self.objective_function(param_config)
+        self.register_result(time, param_config_index)
+        self.update_unique_results()
+        self.fevals = len(self.unique_results)
+        return time
+
+    def register_result(self, result: float, param_config_index: int):
+        """ Registers the result to the Tensors and adds the hyperparameters to the results dict """
+        # set the unvisited Tensors
+        if self.unvisited_configs[param_config_index] == False:
+            raise ValueError(f"The param config index {param_config_index} was already set to False!")
+        self.unvisited_configs[param_config_index] = False
+
+        # set the results Tensors
+        last_result = self.all_results[-1]
+        if result != self.invalid_value:
+            self.valid_configs[param_config_index] = True
+            self.results[param_config_index] = result
+            assert last_result['time'] == result
+            self.results_std[param_config_index] = np.std(last_result['times'])
+
+        # add the current model parameters to the results dict
+        if len(self.all_results) < 1:
+            return
+        for key, value in self.hyperparams.items():
+            last_result[key] = value
+        self.all_results[-1] = last_result
+
+    def update_unique_results(self):
+        """ Updates the unique results dictionary """
+        record = self.all_results[-1]
+        # make a unique string by taking every value in a result, if it already exists, it is overwritten
+        self.unique_results.update({",".join([str(v) for k, v in record.items() if k in self.tuning_options.tune_params]): record["time"]})
+
+    def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        """ Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations """
+        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
+            observed_pred = self.likelihood(self.model(self.test_x))
+            mu = observed_pred.mean
+            std = observed_pred.variance.clamp(min=1e-9)    # TODO .sqrt() or not? looks like without is better
+            return mu, std
+
+    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        """ Remove an index from a tuple of predictions """
+        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
+
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = list(range(len(self.unvisited_param_configs)))
+        shuffle(list_random)
+        return list_random
+
+    def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
+        """ compute probability of improvement by assuming normality on the difference in improvement """
+        diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
+        diff_improvement = (diff_improvement - diff_improvement.mean()) / diff_improvement.std()    # force to N(0,1) with z-score
+        if self.optimization_direction == 'max':
+            diff_improvement = -diff_improvement
+        return diff_improvement
+
+    def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
+        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        if not self.af_params['explorationfactor'] == 'CV':
+            raise ValueError(f"Contextual Variance was called, but is not set as the exploration factor ({self.af_params['explorationfactor']})")
+        if self.optimization_direction == 'max':
+            raise NotImplementedError("Contextual Variance has not yet been implemented for maximisation")
+        if self.current_optimum == self.inf_value:
+            return 0.01
+        if self.scaled_output:
+            improvement_over_initial_sample = (abs(self.current_optimum) - self.initial_sample_mean) / self.initial_sample_std
+            improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
+            improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
+            # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
+            cv = max(np.log(1 - improvement_diff) + 0.1, 0.001)
+            return cv
+        else:
+            raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
+
+    def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
+        """ Acquisition function Probability of Improvement (PoI) tensor-based """
+
+        # prefetch required data
+        y_mu, y_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
+        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
+        cdf = normal.cdf(diff_improvement)
+
+        # sanity check
+        if torch.all(cdf == cdf[0]):
+            raise ValueError("You need to scale the diff_improvement-values!")
+        return cdf
+
+    def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
+        """ Acquisition function Expected Improvement (EI) tensor-based """
+
+        # prefetch required data
+        y_mu, y_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+        # fplus = torch.full_like(y_mu, fplus) TODO does this make a difference for performance?
+
+        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
+        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
+        cdf = normal.cdf(diff_improvement)
+        pdf = torch.exp(normal.log_prob(diff_improvement))
+
+        # sanity check
+        if torch.all(cdf == cdf[0]) or torch.all(pdf == pdf[0]):
+            raise ValueError("You need to scale the diff_improvement-values!")
+
+        # compute expected improvement in bulk
+        exp_improvement = (pdf + diff_improvement + y_std * cdf)
+        # alternative exp_improvement = y_std * (pdf + diff_improvement * cdf)
+        # alternative exp_improvement = -((fplus - y_mu) * cdf + y_std * pdf)
+        return exp_improvement
+
+    """                  """
+    """ Helper functions """
+    """                  """
+
+    def get_hyperparam(self, name: str, default, supported_values=list()):
+        """ Retrieve the value of a hyperparameter based on the name """
+        value = self.tuning_options.strategy_options.get(name, default)
+        if len(supported_values) > 0 and value not in supported_values:
+            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+        return value
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function based on the name """
+        if acquisition_function not in supported_methods:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+        if acquisition_function == 'poi':
+            self.acquisition_function = self.af_probability_of_improvement_tensor
+        elif acquisition_function == 'ei':
+            self.acquisition_function = self.af_expected_improvement_tensor
+        elif acquisition_function == 'random':
+            self.acquisition_function = self.af_random
+
+    def apply_scaling_to_inputs(self):
+        """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
+        # TODO look into the efficiency, especially for GEMM (18.54%)
+        self.scaled_inputs = torch.zeros_like(self.param_configs)
+        param_configs_scaled = torch.zeros_like(self.param_configs)
+
+        # first get the scaling factors of each parameter
+        v_min_list = list()
+        v_max_list = list()
+        unchanging_params_list = list()
+        for param_values in self.tune_params.values():
+            v_min = min(param_values)
+            v_max = max(param_values)
+            v_min_list.append(v_min)
+            v_max_list.append(v_max)
+            unchanging_params_list.append(v_min == v_max)
+
+        # then set each parameter value to the scaled value
+        for param_index in range(len(self.param_configs[0])):
+            v_min = v_min_list[param_index]
+            v_max = v_max_list[param_index]
+            v_diff = v_max - v_min
+            for param_config_index, param_config in enumerate(self.param_configs):
+                param_configs_scaled[param_config_index][param_index] = (param_config[param_index] - v_min) / v_diff
+
+        # finally remove parameters that are constant by applying a mask
+        unchanging_params_tensor = torch.tensor(unchanging_params_list, dtype=torch.bool)
+        if torch.all(unchanging_params_tensor == True):
+            raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
+        nonstatic_param_count = torch.count_nonzero(~unchanging_params_tensor)
+        self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
+        for param_config_index, param_config in enumerate(param_configs_scaled):
+            self.param_configs_scaled[param_config_index] = param_config[~unchanging_params_tensor]
+
+    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
+        """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
+        parameter_space = deepcopy(parameter_space)
+        number_of_params = len(parameter_space[0])
+
+        # find out which parameters have nonnumerical or mixed types, and create a range of integers instead
+        nonnumericals_exist = False
+        nonnumerical_type = torch.zeros(number_of_params, dtype=torch.bool)
+        nonnumerical_values = [ [] for _ in range(number_of_params) ]
+        tune_params = deepcopy(self.tuning_options.tune_params)
+        for param_index, (param_key, param_values) in enumerate(self.tuning_options.tune_params.items()):
+            if not all(isinstance(v, (int, float, complex)) for v in param_values):
+                nonnumericals_exist = True
+                nonnumerical_type[param_index] = True
+                nonnumerical_values[param_index] = param_values
+                tune_params[param_key] = range(len(param_values))
+
+        # overwrite the nonnumerical parameters with numerical parameters
+        if nonnumericals_exist:
+            self.tuning_options["snap"] = False     # snapping is only possible with numerical values
+            for param_config_index, param_config in enumerate(parameter_space):
+                parameter_space[param_config_index] = list(param_config)
+                for param_index, param_value in enumerate(param_config):
+                    if nonnumerical_type[param_index]:
+                        # just use the index of the non-numerical value instead of the value
+                        new_value = nonnumerical_values[param_index].index(param_value)
+                        parameter_space[param_config_index][param_index] = new_value
+
+        return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
+
+
+    def visualize(self):
+        """ Visualize the surrogate model and observations in a plot """
+        from matplotlib import pyplot as plt
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            # Initialize plot
+            f, ax = plt.subplots(1, 1, figsize=(10, 5))
+            ax.set_ylabel('Value')
+            ax.set_xlabel('Parameter')
+
+            param_configs = self.param_configs.to(self.out_device)
+
+            # get true function
+            objective_results = np.array([])
+            for param_config in param_configs:
+                result = self.objective_function(tuple(param_config.tolist()))
+                if result == self.invalid_value:
+                    result = np.nan
+                objective_results = np.append(objective_results, result)
+            if self.scaled_output:
+                objective_results = (objective_results - objective_results.mean()) / objective_results.std()
+
+            if len(param_configs[0]) == 1:
+                ax.plot(np.linspace(param_configs[0], param_configs[-1], self.size), objective_results, 'r')
+            else:
+                ax.plot(range(self.size), objective_results, 'r')
+
+            # take the parameter values for 1D, otherwise the indices
+            if len(param_configs[0]) == 1:
+                x_axis_param_configs = param_configs
+                test_x_x_axis = self.test_x_unscaled.squeeze().to(self.out_device).numpy()
+            else:
+                x_axis_param_configs = torch.tensor(range(self.size))
+                test_x_x_axis = x_axis_param_configs[self.unvisited_configs].to(self.out_device)
+
+            # Get upper and lower confidence bounds
+            observed_pred = self.likelihood(self.model(self.test_x))
+            lower, upper = observed_pred.confidence_region()
+            lower, upper = lower.to(self.out_device), upper.to(self.out_device)
+
+            # Plot initial sample as green stars
+            initial_sample_x_axis = x_axis_param_configs[self.inital_sample_configs].to(self.out_device)
+            initial_sample_y_axis = self.results[self.inital_sample_configs].to(self.out_device)
+            ax.plot(initial_sample_x_axis.numpy(), initial_sample_y_axis.numpy(), 'g*')
+
+            # Plot training data as black stars
+            mask_training_data_no_initial_sample = ~self.inital_sample_configs == self.valid_configs
+            training_x_axis = x_axis_param_configs[mask_training_data_no_initial_sample].to(self.out_device)
+            training_y_axis = self.results[mask_training_data_no_initial_sample].to(self.out_device)
+            ax.plot(training_x_axis.numpy(), training_y_axis.numpy(), 'k*')
+
+            # Plot predictive means as blue line
+            test_x_y_axis = observed_pred.mean.to(self.out_device)
+            ax.plot(test_x_x_axis, test_x_y_axis.numpy(), 'b')
+
+            # Shade between the lower and upper confidence bounds
+            ax.fill_between(test_x_x_axis, lower.numpy(), upper.numpy(), alpha=0.5)
+
+            # set the limits and legend
+            # ax.set_ylim(min(objective_results), max(filter(lambda x: x != self.invalid_value, objective_results)))
+            ax.legend(['Objective Function', 'Initial Sample', 'Observed Data', 'Mean', 'Confidence'])
+
+            if self.animate:
+                f.canvas.draw()
+                plt.pause(0.1)
+
+            plt.show()
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 71bf66c3e..c900f5347 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -259,12 +259,17 @@ def get_kernel_string(kernel_source, params=None):
     return kernel_string
 
 
-def get_number_of_valid_configs(tuning_options, max_threads):
-    """compute number of valid configurations in a search space based on restrictions and max_threads"""
+def get_valid_configs(tuning_options, max_threads) -> list:
+    """ compute valid configurations in a search space based on restrictions and max_threads"""
     parameter_space = itertools.product(*tuning_options.tune_params.values())
     if tuning_options.restrictions is not None:
-        parameter_space = filter(lambda p: util.config_valid(p, tuning_options, max_threads), parameter_space)
-    return len(list(parameter_space))
+        parameter_space = filter(lambda p: config_valid(p, tuning_options, max_threads), parameter_space)
+    return list(parameter_space)
+
+
+def get_number_of_valid_configs(tuning_options, max_threads) -> int:
+    """compute number of valid configurations in a search space based on restrictions and max_threads"""
+    return len(get_valid_configs(tuning_options, max_threads))
 
 
 def get_problem_size(problem_size, params):
@@ -388,7 +393,7 @@ def looks_like_a_filename(kernel_source):
             if s in kernel_source:
                 result = False
         # string must contain substring ".c", ".opencl", or ".F"
-        result = result and any([s in kernel_source for s in (".c", ".opencl", ".F")])
+        result = result and any([s in kernel_source for s in (".c", ".opencl", ".F", ".py")])
     logging.debug('kernel_source is a filename: %s' % str(result))
     return result
 

From 5e0bfdeecf4553a713db874256422965f49b5885 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 15 Jan 2022 13:58:20 +0100
Subject: [PATCH 003/253] Enormous improvement in both performance and speed
 with BO GPyTorch, also added parsing of restriction strings to functions for
 major performance improvement

---
 kernel_tuner/interface.py                     |   4 +
 .../strategies/bayes_opt_GPyTorch_lean.py     | 179 +++++++++---------
 kernel_tuner/util.py                          |  52 +++++
 3 files changed, 150 insertions(+), 85 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 475966adc..e5cddcdb8 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -418,6 +418,10 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
     # check whether block_size_names are used as expected
     util.check_block_size_params_names_list(block_size_names, tune_params)
 
+    # if the restrictions are not callable, make them (increases restrictions check performance significantly)
+    if restrictions is not None and not callable(restrictions):
+        restrictions = util.parse_restrictions(restrictions)
+
     if iterations < 1:
         raise ValueError("Iterations should be at least one!")
 
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index 8f8f0be30..594f4aa23 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -1,20 +1,16 @@
 """ Lean implementation of Bayesian Optimization with GPyTorch """
 from copy import deepcopy
 from typing import Any, Tuple
-from random import randint, shuffle
-from math import floor, ceil
+from random import randint, shuffle, choice
+from math import ceil
 import numpy as np
-from numpy.lib.function_base import diff
+from numpy.lib.arraysetops import unique
+from numpy.random import default_rng
 import torch
 import gpytorch
 
-from skopt.sampler import Lhs
-from scipy.stats import norm
-
 from kernel_tuner.util import get_valid_configs, config_valid
 from kernel_tuner.strategies import minimize
-from torch.functional import Tensor
-from torch.nn import parameter
 
 supported_initial_sample_methods = ['lhs', 'index', 'random']
 supported_methods = ['ei', 'poi', 'random']
@@ -48,8 +44,8 @@ def tune(runner, kernel_options, device_options, tuning_options):
     """
 
     # set CUDA availability
-    cuda_available = torch.cuda.is_available()
-    cuda_available = False
+    use_cuda = False
+    cuda_available = torch.cuda.is_available() and use_cuda
     device = torch.device("cuda:0" if cuda_available else "cpu")
     if cuda_available:
         print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
@@ -69,7 +65,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
-    # TODO look into the efficiency, especially for GEMM (56.47%)
     parameter_space = get_valid_configs(tuning_options, max_threads)
 
     # limit max_fevals to max size of the parameter space
@@ -119,9 +114,10 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.max_threads = runner.dev.max_threads
 
         # get tuning options
-        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "index", supported_initial_sample_methods)
+        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
+        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1)
         self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 50)
-        self.training_iter = self.get_hyperparam("trainingiter", 0)
+        self.training_iter = self.get_hyperparam("trainingiter", 3)
         self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
         self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5)
         self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
@@ -143,7 +139,7 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.dtype = torch.double
         self.size = len(parameter_space)
         self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
-        self.index_counter = torch.tensor(range(self.size))
+        self.index_counter = torch.arange(self.size)
         self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
         self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
         self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan             # x (param configs) and y (results) must be the same type
@@ -228,29 +224,13 @@ def invalid_x(self):
     def true_param_config_index(self, target_index: int) -> int:
         """ The index required to get the true config param index when dealing with test_x """
         # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
-
-        counter_masked = self.index_counter[self.unvisited_configs]
-        return counter_masked[target_index]
+        masked_counter = self.index_counter[self.unvisited_configs]
+        return masked_counter[target_index]
 
     def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tensor:
-        """ Same as true_param_config_index, but for an array of targets in O(n) instead of O(n^2). Assumes the array is sorted in ascending order. """
-        # TODO same trick as true_param_config_index
-
-        true_indices = torch.full_like(target_indices, -1).to(self.device)
-        target_index_index = 0
-        target_index = target_indices[target_index_index]
-        count = -1
-        for index, value in enumerate(self.unvisited_configs):
-            if value == True:
-                count += 1
-            if count == target_index:
-                true_indices[target_index_index] = index
-                target_index_index += 1
-                if target_index_index == len(target_indices):
-                    break
-                target_index = target_indices[target_index_index]
-
-        return true_indices
+        """ Same as true_param_config_index, but for an array of targets instead. """
+        masked_counter = self.index_counter[self.unvisited_configs]
+        return masked_counter.index_select(0, target_indices)
 
     def initialize_model(self):
         """ Initialize the surrogate model """
@@ -280,28 +260,35 @@ def initialize_model(self):
 
     def initial_sample(self):
         """ Take an initial sample of the parameter space """
-        param_configs = list()
+        list_param_config_indices = list()
+
+        # generate a random offset from a normal distribution to add to the sample indices
+        rng = default_rng()
+        if self.initial_sample_random_offset_factor > 0.5:
+            raise ValueError("Random offset factor should not be greater than 0.5 to avoid overlapping index offsets")
+        random_offset_size = (self.size / self.num_initial_samples) * self.initial_sample_random_offset_factor
+        random_offsets = np.round(rng.standard_normal(self.num_initial_samples) * random_offset_size)
 
         # first apply the initial sampling method
         if self.initial_sample_method == 'lhs':
-            indices, param_configs = self.get_lhs_sample()
-            for index in indices:
-                # indices may be -1 because of parameter filtering etc., so we replace those with index-spaces samples
-                if index != -1:
-                    self.evaluate_config(index)
+            indices = self.get_lhs_samples(random_offsets)
+            for param_config_index in indices.tolist():
+                list_param_config_indices.append(param_config_index)
+                self.evaluate_config(param_config_index)
         elif self.initial_sample_method == 'random':
             while self.fevals < self.num_initial_samples:
                 param_config_index = randint(0, self.size - 1)
-                param_config = tuple(self.param_configs_scaled[param_config_index].tolist())
-                if param_config in param_configs:
+                if param_config_index in list_param_config_indices:
                     continue
-                param_configs.append(param_config)
+                list_param_config_indices.append(param_config_index)
                 self.evaluate_config(param_config_index)
 
         # then take index-spaced samples until all samples are valid
         while self.fevals < self.num_initial_samples:
             least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-            self.evaluate_config(least_evaluated_region_index)
+            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size-1)
+            list_param_config_indices.append(param_config_index)
+            self.evaluate_config(param_config_index)
 
         # set the current optimum, initial sample mean and initial sample std
         self.current_optimum = self.opt(self.train_y).item()
@@ -311,10 +298,11 @@ def initial_sample(self):
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
 
-    def get_lhs_sample(self) -> Tuple[list, list]:
-        """ Get a centered Latin Hypercube Sample """
-        param_configs = list()
+    def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
+        """ Get a centered Latin Hypercube Sample with a random offset """
         n_samples = self.num_initial_samples
+
+        # first get the seperate parameter values to make possibly fictional distributed parameter configurations
         temp_param_configs = [[] for _ in range(n_samples)]
         for param_values in self.tune_params.values():
             l = len(param_values)
@@ -331,34 +319,59 @@ def get_lhs_sample(self) -> Tuple[list, list]:
                 index = ceil(offset + interval * (i + 1)) - 1
                 temp_param_configs[i].append(param_values[index])
 
-        # set the actual parameter configurations
-        for param_config in temp_param_configs:
-            param_config = tuple(param_config)
-            param_configs.append(param_config)
-        param_configs = torch.tensor(param_configs, dtype=self.dtype).to(self.device)
+        # create a tensor of the possibly fictional parameter configurations
+        param_configs = torch.tensor(list(tuple(param_config) for param_config in temp_param_configs), dtype=self.dtype).to(self.device)
+        param_configs = param_configs.unique(dim=0) # remove duplicates
+        n_samples_unique = len(param_configs)
+
+        # get the indices of the parameter configurations
+        num_params = len(self.param_configs[0])
+        minimum_required_num_matching_params = round(num_params * 0.75)  # set the number of parameter matches allowed to be dropped before the search is stopped
+        param_configs_indices = torch.full((n_samples_unique,), -1, dtype=torch.int)
+        for selected_index, selected_param_config in enumerate(param_configs):
+            # for each parameter configuration, count the number of matching parameters
+            required_num_matching_params = num_params
+            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
+            match_mask = (matching_params == required_num_matching_params)
+            # if there is not at least one matching parameter configuration, lower the required number of matching parameters
+            found_num_matching_param_configs = match_mask.count_nonzero()
+            while found_num_matching_param_configs < 1 and required_num_matching_params > minimum_required_num_matching_params:
+                required_num_matching_params -= 1
+                match_mask = (matching_params == required_num_matching_params)
+                found_num_matching_param_configs = match_mask.count_nonzero()
+
+            # if more than one possible parameter configuration has been found, pick a random one
+            if found_num_matching_param_configs > 1:
+                index = choice(self.index_counter[match_mask])
+            elif found_num_matching_param_configs == 1:
+                index = self.index_counter[match_mask].item()
+            else:
+                # if no matching parameter configurations were found
+                continue
 
-        # get the indices of the parameter configurations in O(n^2)
-        param_configs_indices = [-1 for _ in range(n_samples)]
-        for index, param_config in enumerate(self.param_configs):
-            for selected_index, selected_param_config in enumerate(param_configs):
-                if torch.allclose(selected_param_config, param_config, equal_nan=False) and index not in param_configs_indices:
-                    param_configs_indices[selected_index] = index
+            # set the selected index
+            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size-1)
 
-        if param_configs_indices.count(-1) > n_samples / 2:
-            print(f"No good fit was found in {param_configs_indices.count(-1)} out of the {n_samples} samples. Perhaps try something other than LHS.")
-        return param_configs_indices, param_configs
+        # filter -1 indices and duplicates that occurred because of the random offset
+        param_configs_indices = param_configs_indices[param_configs_indices >= 0]
+        param_configs_indices = param_configs_indices.unique().type(torch.int)
+        if len(param_configs_indices) < n_samples / 2:
+            print(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1.",
+                  f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params}).",
+                  "Perhaps try something other than LHS.")
+        return param_configs_indices
 
     def get_middle_index_of_least_evaluated_region(self) -> int:
         """ Get the middle index of the region of parameter configurations that is the least visited """
         # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
-        distance_counter = -1
-        distance_tensor = torch.zeros_like(self.unvisited_configs, dtype=torch.int)     # TODO check if .to(self.device) is faster or slower
-        for index, unvisited in enumerate(self.unvisited_configs):
-            if unvisited:
-                distance_counter += 1
-            if not unvisited:
-                distance_counter = 0
-            distance_tensor[index] = distance_counter
+        distance_tensor = torch.arange(self.size)
+
+        # first get the indices that were visited (must be in ascending order)
+        indices_visited = self.index_counter[~self.unvisited_configs]
+
+        # then reset the range after the visited index
+        for index_visited in indices_visited:
+            distance_tensor[index_visited:] = torch.arange(self.size - index_visited)
 
         biggest_distance_index = distance_tensor.argmax()
         biggest_distance = distance_tensor[biggest_distance_index].item()
@@ -542,7 +555,8 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
             improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
             # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
-            cv = max(np.log(1 - improvement_diff) + 0.1, 0.001)
+            x = 1 - min(max(1 - improvement_diff, 0.2), 0.0)
+            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2057, y=0.0.
             return cv
         else:
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
@@ -573,7 +587,6 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         fplus = self.current_optimum - hyperparam
-        # fplus = torch.full_like(y_mu, fplus) TODO does this make a difference for performance?
 
         diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
         normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
@@ -615,37 +628,33 @@ def set_acquisition_function(self, acquisition_function: str):
 
     def apply_scaling_to_inputs(self):
         """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
-        # TODO look into the efficiency, especially for GEMM (18.54%)
-        self.scaled_inputs = torch.zeros_like(self.param_configs)
         param_configs_scaled = torch.zeros_like(self.param_configs)
 
         # first get the scaling factors of each parameter
         v_min_list = list()
-        v_max_list = list()
+        v_diff_list = list()
         unchanging_params_list = list()
         for param_values in self.tune_params.values():
             v_min = min(param_values)
             v_max = max(param_values)
             v_min_list.append(v_min)
-            v_max_list.append(v_max)
+            v_diff_list.append(v_max - v_min)
             unchanging_params_list.append(v_min == v_max)
 
         # then set each parameter value to the scaled value
         for param_index in range(len(self.param_configs[0])):
             v_min = v_min_list[param_index]
-            v_max = v_max_list[param_index]
-            v_diff = v_max - v_min
-            for param_config_index, param_config in enumerate(self.param_configs):
-                param_configs_scaled[param_config_index][param_index] = (param_config[param_index] - v_min) / v_diff
+            v_diff = v_diff_list[param_index]
+            param_configs_scaled[:,param_index] = torch.sub(self.param_configs[:,param_index], v_min).div(v_diff)
 
         # finally remove parameters that are constant by applying a mask
-        unchanging_params_tensor = torch.tensor(unchanging_params_list, dtype=torch.bool)
-        if torch.all(unchanging_params_tensor == True):
+        unchanging_params_tensor = ~torch.tensor(unchanging_params_list, dtype=torch.bool)
+        if torch.all(unchanging_params_tensor == False):
             raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
-        nonstatic_param_count = torch.count_nonzero(~unchanging_params_tensor)
+        nonstatic_param_count = torch.count_nonzero(unchanging_params_tensor)
         self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
         for param_config_index, param_config in enumerate(param_configs_scaled):
-            self.param_configs_scaled[param_config_index] = param_config[~unchanging_params_tensor]
+            self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
 
     def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
         """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
@@ -709,7 +718,7 @@ def visualize(self):
                 x_axis_param_configs = param_configs
                 test_x_x_axis = self.test_x_unscaled.squeeze().to(self.out_device).numpy()
             else:
-                x_axis_param_configs = torch.tensor(range(self.size))
+                x_axis_param_configs = torch.arange(self.size)
                 test_x_x_axis = x_axis_param_configs[self.unvisited_configs].to(self.out_device)
 
             # Get upper and lower confidence bounds
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index c900f5347..838a54b97 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -8,6 +8,7 @@
 import logging
 import warnings
 import re
+from types import FunctionType
 
 import numpy as np
 try:
@@ -669,3 +670,54 @@ def dump_cache(obj: str, tuning_options):
     if isinstance(tuning_options.cache, dict) and tuning_options.cachefile:
         with open(tuning_options.cachefile, "a") as cachefile:
             cachefile.write(obj)
+
+
+def parse_restrictions(restrictions: str):
+    """" parses restrictions from a list of strings into a callable function """
+    operators = [ '+', '-', '*', '/', '%', '==', '!=', '(', ')', '[', ']' ]
+
+    suffix = ' and '
+    parsed_restrictions = ""
+    for restriction in restrictions:
+        new = ""
+
+        # first make sure everything that should be space-seperated is
+        for index in range(len(restriction)):
+            if restriction[index] in operators and index > 0 and restriction[index-1] != ' ':
+                new += ' '
+            new += restriction[index]
+            if restriction[index] in operators and index < len(restriction) - 1 and restriction[index+1] != ' ':
+                new += ' '
+
+        restriction = new
+
+        # then parse each part
+        new = ""
+        words = restriction.split(" ")
+        for word in words:
+
+            # filter spaces and empty words
+            if word == ' ' or word == '':
+                continue
+
+            # filter the operators
+            if word in operators:
+                new += word + ' '
+                continue
+
+            # filter numbers
+            if np.char.isnumeric(word):
+                new += word + ' '
+                continue
+
+            # make variables a dictionary 'p' lookup
+            word = f"params['{word}']"
+            new += word
+            new += ' '
+
+        parsed_restrictions += (new + suffix)
+
+    parsed_restrictions = "def restrictions(params): \n return " + parsed_restrictions[:-len(suffix)]
+    code_object = compile(parsed_restrictions, '<string>', 'exec')
+    func = FunctionType(code_object.co_consts[0], globals())
+    return func

From e355c58700866acdfe8ab6744f7430e54aa47834 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 16 Feb 2022 09:55:28 +0100
Subject: [PATCH 004/253] Made experimental Python runner parallel, completely
 new hyperparameter tuning metric, improvements to the BO GPyTorch lean
 implementation

---
 .gitattributes                                |   0
 .github/workflows/docs.yml                    |   0
 .github/workflows/python-app.yml              |   0
 .gitignore                                    |   0
 .zenodo.json                                  |   0
 CHANGELOG.md                                  |   0
 CITATION.cff                                  |   0
 CONTRIBUTING.rst                              |   0
 INSTALL.rst                                   |   0
 LICENSE                                       |   0
 MANIFEST.in                                   |   0
 README.rst                                    |   0
 doc/Makefile                                  |   0
 doc/deploy.sh                                 |   0
 doc/gemm-amd-summary.png                      | Bin
 doc/gh_pages-deploy_key.enc                   | Bin
 doc/source/conf.py                            |   0
 doc/source/contributing.rst                   |   0
 doc/source/correctness.rst                    |   0
 doc/source/design.rst                         |   0
 doc/source/examples.rst                       |   0
 doc/source/hostcode.rst                       |   0
 doc/source/index.rst                          |   0
 doc/source/install.rst                        |   0
 doc/source/templates.rst                      |   0
 doc/source/user-api.rst                       |   0
 doc/source/vocabulary.rst                     |   0
 examples/README.rst                           |   0
 examples/c/matrix_multiply.cpp                |   0
 examples/c/matrix_multiply.py                 |   0
 examples/cuda/convolution.cu                  |   0
 examples/cuda/convolution_streams.cu          |   0
 examples/cuda/expdist.cu                      |   0
 examples/cuda/matmul.cu                       |   0
 examples/cuda/pnpoly.cu                       |   0
 examples/cuda/pnpoly_host.cu                  |   0
 examples/cuda/reduction.cu                    |   0
 examples/cuda/spmv.cu                         |   0
 examples/cuda/stencil.cu                      |   0
 examples/cuda/texture.py                      |   0
 examples/cuda/vector_add_jinja.cu             |   0
 examples/cuda/vector_add_jinja2.py            |   0
 examples/cuda/zeromeanfilter.cu               |   0
 examples/fortran/vector_add.F90               |   0
 examples/fortran/vector_add_acc.F90           |   0
 examples/opencl/convolution.cl                |   0
 examples/opencl/matmul.cl                     |   0
 examples/opencl/reduction.cl                  |   0
 examples/opencl/stencil.cl                    |   0
 kernel_tuner/__init__.py                      |   0
 kernel_tuner/c.py                             |   0
 kernel_tuner/core.py                          |   8 +-
 kernel_tuner/cuda.py                          |   0
 kernel_tuner/cupy.py                          |   0
 kernel_tuner/hyper.py                         |   0
 kernel_tuner/integration.py                   |   0
 kernel_tuner/interface.py                     |   6 +-
 kernel_tuner/kernelbuilder.py                 |   0
 kernel_tuner/nvml.py                          |   0
 kernel_tuner/observers.py                     |   0
 kernel_tuner/opencl.py                        |   0
 kernel_tuner/python.py                        | 243 +++++++-
 kernel_tuner/runners/__init__.py              |   0
 kernel_tuner/runners/sequential.py            |  17 +-
 kernel_tuner/runners/simulation.py            |   2 +-
 kernel_tuner/strategies/__init__.py           |   0
 kernel_tuner/strategies/basinhopping.py       |   0
 kernel_tuner/strategies/bayes_opt.py          |   0
 kernel_tuner/strategies/bayes_opt_GPyTorch.py |   0
 .../strategies/bayes_opt_GPyTorch_lean.py     | 537 +++++++++++++-----
 .../strategies/bayes_opt_alt_BOTorch.py       |   0
 .../strategies/bayes_opt_alt_BayesOpt.py      |   0
 .../strategies/bayes_opt_alt_HyperOpt.py      |   0
 .../strategies/bayes_opt_alt_ScikitOpt.py     |   0
 kernel_tuner/strategies/bayes_opt_old.py      |   0
 kernel_tuner/strategies/brute_force.py        |   3 +-
 kernel_tuner/strategies/diff_evo.py           |   0
 kernel_tuner/strategies/dual_annealing.py     |   0
 kernel_tuner/strategies/firefly_algorithm.py  |   0
 kernel_tuner/strategies/genetic_algorithm.py  |   0
 kernel_tuner/strategies/greedy_ils.py         |   0
 kernel_tuner/strategies/greedy_mls.py         |   0
 kernel_tuner/strategies/hillclimbers.py       |   0
 kernel_tuner/strategies/minimize.py           |   4 +-
 kernel_tuner/strategies/mls.py                |   0
 kernel_tuner/strategies/ordered_greedy_mls.py |   0
 kernel_tuner/strategies/pso.py                |   0
 kernel_tuner/strategies/random_sample.py      |   0
 .../strategies/simulated_annealing.py         |   0
 kernel_tuner/util.py                          |  50 +-
 kernel_tuner/wrappers.py                      |   0
 roadmap.md                                    |   0
 setup.cfg                                     |   0
 setup.py                                      |   0
 test/__init__.py                              |   0
 test/context.py                               |   0
 test/strategies/test_bayesian_optimization.py |   0
 test/strategies/test_genetic_algorithm.py     |   0
 test/strategies/test_minimize.py              |   0
 test/strategies/test_strategies.py            |   0
 test/test_c_functions.py                      |   0
 test/test_cache_file.json                     |   0
 test/test_core.py                             |   0
 test/test_cuda_functions.py                   |   0
 test/test_cuda_mocked.py                      |   0
 test/test_cupy_functions.py                   |   0
 test/test_hyper.py                            |   0
 test/test_integration.py                      |   0
 test/test_interface.py                        |   0
 test/test_kernelbuilder.py                    |   0
 test/test_minimize.py                         |   0
 test/test_observers.py                        |   0
 test/test_opencl_functions.py                 |   0
 test/test_runners.py                          |   0
 test/test_util_functions.py                   |   0
 tutorial/README.md                            |   0
 tutorial/convolution.ipynb                    |   0
 tutorial/diffusion.ipynb                      |   0
 tutorial/diffusion_opencl.ipynb               |   0
 tutorial/diffusion_use_optparam.ipynb         |   0
 tutorial/grid3d.ipynb                         |   0
 tutorial/matmul/matmul.cu                     |   0
 tutorial/matmul/matmul.png                    | Bin
 tutorial/matmul/matmul_naive.cu               |   0
 tutorial/matmul/matmul_naive.png              | Bin
 tutorial/matmul/matmul_shared.cu              |   0
 tutorial/matmul/matmul_shared.png             | Bin
 tutorial/matrix_multiplication.ipynb          |   0
 128 files changed, 650 insertions(+), 220 deletions(-)
 mode change 100644 => 100755 .gitattributes
 mode change 100644 => 100755 .github/workflows/docs.yml
 mode change 100644 => 100755 .github/workflows/python-app.yml
 mode change 100644 => 100755 .gitignore
 mode change 100644 => 100755 .zenodo.json
 mode change 100644 => 100755 CHANGELOG.md
 mode change 100644 => 100755 CITATION.cff
 mode change 100644 => 100755 CONTRIBUTING.rst
 mode change 100644 => 100755 INSTALL.rst
 mode change 100644 => 100755 LICENSE
 mode change 100644 => 100755 MANIFEST.in
 mode change 100644 => 100755 README.rst
 mode change 100644 => 100755 doc/Makefile
 mode change 100644 => 100755 doc/deploy.sh
 mode change 100644 => 100755 doc/gemm-amd-summary.png
 mode change 100644 => 100755 doc/gh_pages-deploy_key.enc
 mode change 100644 => 100755 doc/source/conf.py
 mode change 100644 => 100755 doc/source/contributing.rst
 mode change 100644 => 100755 doc/source/correctness.rst
 mode change 100644 => 100755 doc/source/design.rst
 mode change 100644 => 100755 doc/source/examples.rst
 mode change 100644 => 100755 doc/source/hostcode.rst
 mode change 100644 => 100755 doc/source/index.rst
 mode change 100644 => 100755 doc/source/install.rst
 mode change 100644 => 100755 doc/source/templates.rst
 mode change 100644 => 100755 doc/source/user-api.rst
 mode change 100644 => 100755 doc/source/vocabulary.rst
 mode change 100644 => 100755 examples/README.rst
 mode change 100644 => 100755 examples/c/matrix_multiply.cpp
 mode change 100644 => 100755 examples/c/matrix_multiply.py
 mode change 100644 => 100755 examples/cuda/convolution.cu
 mode change 100644 => 100755 examples/cuda/convolution_streams.cu
 mode change 100644 => 100755 examples/cuda/expdist.cu
 mode change 100644 => 100755 examples/cuda/matmul.cu
 mode change 100644 => 100755 examples/cuda/pnpoly.cu
 mode change 100644 => 100755 examples/cuda/pnpoly_host.cu
 mode change 100644 => 100755 examples/cuda/reduction.cu
 mode change 100644 => 100755 examples/cuda/spmv.cu
 mode change 100644 => 100755 examples/cuda/stencil.cu
 mode change 100644 => 100755 examples/cuda/texture.py
 mode change 100644 => 100755 examples/cuda/vector_add_jinja.cu
 mode change 100644 => 100755 examples/cuda/vector_add_jinja2.py
 mode change 100644 => 100755 examples/cuda/zeromeanfilter.cu
 mode change 100644 => 100755 examples/fortran/vector_add.F90
 mode change 100644 => 100755 examples/fortran/vector_add_acc.F90
 mode change 100644 => 100755 examples/opencl/convolution.cl
 mode change 100644 => 100755 examples/opencl/matmul.cl
 mode change 100644 => 100755 examples/opencl/reduction.cl
 mode change 100644 => 100755 examples/opencl/stencil.cl
 mode change 100644 => 100755 kernel_tuner/__init__.py
 mode change 100644 => 100755 kernel_tuner/c.py
 mode change 100644 => 100755 kernel_tuner/core.py
 mode change 100644 => 100755 kernel_tuner/cuda.py
 mode change 100644 => 100755 kernel_tuner/cupy.py
 mode change 100644 => 100755 kernel_tuner/hyper.py
 mode change 100644 => 100755 kernel_tuner/integration.py
 mode change 100644 => 100755 kernel_tuner/interface.py
 mode change 100644 => 100755 kernel_tuner/kernelbuilder.py
 mode change 100644 => 100755 kernel_tuner/nvml.py
 mode change 100644 => 100755 kernel_tuner/observers.py
 mode change 100644 => 100755 kernel_tuner/opencl.py
 mode change 100644 => 100755 kernel_tuner/python.py
 mode change 100644 => 100755 kernel_tuner/runners/__init__.py
 mode change 100644 => 100755 kernel_tuner/runners/sequential.py
 mode change 100644 => 100755 kernel_tuner/runners/simulation.py
 mode change 100644 => 100755 kernel_tuner/strategies/__init__.py
 mode change 100644 => 100755 kernel_tuner/strategies/basinhopping.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_old.py
 mode change 100644 => 100755 kernel_tuner/strategies/brute_force.py
 mode change 100644 => 100755 kernel_tuner/strategies/diff_evo.py
 mode change 100644 => 100755 kernel_tuner/strategies/dual_annealing.py
 mode change 100644 => 100755 kernel_tuner/strategies/firefly_algorithm.py
 mode change 100644 => 100755 kernel_tuner/strategies/genetic_algorithm.py
 mode change 100644 => 100755 kernel_tuner/strategies/greedy_ils.py
 mode change 100644 => 100755 kernel_tuner/strategies/greedy_mls.py
 mode change 100644 => 100755 kernel_tuner/strategies/hillclimbers.py
 mode change 100644 => 100755 kernel_tuner/strategies/minimize.py
 mode change 100644 => 100755 kernel_tuner/strategies/mls.py
 mode change 100644 => 100755 kernel_tuner/strategies/ordered_greedy_mls.py
 mode change 100644 => 100755 kernel_tuner/strategies/pso.py
 mode change 100644 => 100755 kernel_tuner/strategies/random_sample.py
 mode change 100644 => 100755 kernel_tuner/strategies/simulated_annealing.py
 mode change 100644 => 100755 kernel_tuner/util.py
 mode change 100644 => 100755 kernel_tuner/wrappers.py
 mode change 100644 => 100755 roadmap.md
 mode change 100644 => 100755 setup.cfg
 mode change 100644 => 100755 setup.py
 mode change 100644 => 100755 test/__init__.py
 mode change 100644 => 100755 test/context.py
 mode change 100644 => 100755 test/strategies/test_bayesian_optimization.py
 mode change 100644 => 100755 test/strategies/test_genetic_algorithm.py
 mode change 100644 => 100755 test/strategies/test_minimize.py
 mode change 100644 => 100755 test/strategies/test_strategies.py
 mode change 100644 => 100755 test/test_c_functions.py
 mode change 100644 => 100755 test/test_cache_file.json
 mode change 100644 => 100755 test/test_core.py
 mode change 100644 => 100755 test/test_cuda_functions.py
 mode change 100644 => 100755 test/test_cuda_mocked.py
 mode change 100644 => 100755 test/test_cupy_functions.py
 mode change 100644 => 100755 test/test_hyper.py
 mode change 100644 => 100755 test/test_integration.py
 mode change 100644 => 100755 test/test_interface.py
 mode change 100644 => 100755 test/test_kernelbuilder.py
 mode change 100644 => 100755 test/test_minimize.py
 mode change 100644 => 100755 test/test_observers.py
 mode change 100644 => 100755 test/test_opencl_functions.py
 mode change 100644 => 100755 test/test_runners.py
 mode change 100644 => 100755 test/test_util_functions.py
 mode change 100644 => 100755 tutorial/README.md
 mode change 100644 => 100755 tutorial/convolution.ipynb
 mode change 100644 => 100755 tutorial/diffusion.ipynb
 mode change 100644 => 100755 tutorial/diffusion_opencl.ipynb
 mode change 100644 => 100755 tutorial/diffusion_use_optparam.ipynb
 mode change 100644 => 100755 tutorial/grid3d.ipynb
 mode change 100644 => 100755 tutorial/matmul/matmul.cu
 mode change 100644 => 100755 tutorial/matmul/matmul.png
 mode change 100644 => 100755 tutorial/matmul/matmul_naive.cu
 mode change 100644 => 100755 tutorial/matmul/matmul_naive.png
 mode change 100644 => 100755 tutorial/matmul/matmul_shared.cu
 mode change 100644 => 100755 tutorial/matmul/matmul_shared.png
 mode change 100644 => 100755 tutorial/matrix_multiplication.ipynb

diff --git a/.gitattributes b/.gitattributes
old mode 100644
new mode 100755
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
old mode 100644
new mode 100755
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
old mode 100644
new mode 100755
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/.zenodo.json b/.zenodo.json
old mode 100644
new mode 100755
diff --git a/CHANGELOG.md b/CHANGELOG.md
old mode 100644
new mode 100755
diff --git a/CITATION.cff b/CITATION.cff
old mode 100644
new mode 100755
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
old mode 100644
new mode 100755
diff --git a/INSTALL.rst b/INSTALL.rst
old mode 100644
new mode 100755
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/MANIFEST.in b/MANIFEST.in
old mode 100644
new mode 100755
diff --git a/README.rst b/README.rst
old mode 100644
new mode 100755
diff --git a/doc/Makefile b/doc/Makefile
old mode 100644
new mode 100755
diff --git a/doc/deploy.sh b/doc/deploy.sh
old mode 100644
new mode 100755
diff --git a/doc/gemm-amd-summary.png b/doc/gemm-amd-summary.png
old mode 100644
new mode 100755
diff --git a/doc/gh_pages-deploy_key.enc b/doc/gh_pages-deploy_key.enc
old mode 100644
new mode 100755
diff --git a/doc/source/conf.py b/doc/source/conf.py
old mode 100644
new mode 100755
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
old mode 100644
new mode 100755
diff --git a/doc/source/correctness.rst b/doc/source/correctness.rst
old mode 100644
new mode 100755
diff --git a/doc/source/design.rst b/doc/source/design.rst
old mode 100644
new mode 100755
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
old mode 100644
new mode 100755
diff --git a/doc/source/hostcode.rst b/doc/source/hostcode.rst
old mode 100644
new mode 100755
diff --git a/doc/source/index.rst b/doc/source/index.rst
old mode 100644
new mode 100755
diff --git a/doc/source/install.rst b/doc/source/install.rst
old mode 100644
new mode 100755
diff --git a/doc/source/templates.rst b/doc/source/templates.rst
old mode 100644
new mode 100755
diff --git a/doc/source/user-api.rst b/doc/source/user-api.rst
old mode 100644
new mode 100755
diff --git a/doc/source/vocabulary.rst b/doc/source/vocabulary.rst
old mode 100644
new mode 100755
diff --git a/examples/README.rst b/examples/README.rst
old mode 100644
new mode 100755
diff --git a/examples/c/matrix_multiply.cpp b/examples/c/matrix_multiply.cpp
old mode 100644
new mode 100755
diff --git a/examples/c/matrix_multiply.py b/examples/c/matrix_multiply.py
old mode 100644
new mode 100755
diff --git a/examples/cuda/convolution.cu b/examples/cuda/convolution.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/convolution_streams.cu b/examples/cuda/convolution_streams.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/expdist.cu b/examples/cuda/expdist.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/matmul.cu b/examples/cuda/matmul.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/pnpoly.cu b/examples/cuda/pnpoly.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/pnpoly_host.cu b/examples/cuda/pnpoly_host.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/reduction.cu b/examples/cuda/reduction.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/spmv.cu b/examples/cuda/spmv.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/stencil.cu b/examples/cuda/stencil.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/texture.py b/examples/cuda/texture.py
old mode 100644
new mode 100755
diff --git a/examples/cuda/vector_add_jinja.cu b/examples/cuda/vector_add_jinja.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/vector_add_jinja2.py b/examples/cuda/vector_add_jinja2.py
old mode 100644
new mode 100755
diff --git a/examples/cuda/zeromeanfilter.cu b/examples/cuda/zeromeanfilter.cu
old mode 100644
new mode 100755
diff --git a/examples/fortran/vector_add.F90 b/examples/fortran/vector_add.F90
old mode 100644
new mode 100755
diff --git a/examples/fortran/vector_add_acc.F90 b/examples/fortran/vector_add_acc.F90
old mode 100644
new mode 100755
diff --git a/examples/opencl/convolution.cl b/examples/opencl/convolution.cl
old mode 100644
new mode 100755
diff --git a/examples/opencl/matmul.cl b/examples/opencl/matmul.cl
old mode 100644
new mode 100755
diff --git a/examples/opencl/reduction.cl b/examples/opencl/reduction.cl
old mode 100644
new mode 100755
diff --git a/examples/opencl/stencil.cl b/examples/opencl/stencil.cl
old mode 100644
new mode 100755
diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/c.py b/kernel_tuner/c.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
old mode 100644
new mode 100755
index fac470b8b..1faf5deb0
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -194,7 +194,8 @@ def check_argument_lists(self, kernel_name, arguments):
 class DeviceInterface(object):
     """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner"""
 
-    def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None):
+    def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None,
+                 parallel_mode=False):
         """ Instantiate the DeviceInterface, based on language in kernel source
 
         :param kernel_source The kernel sources
@@ -228,6 +229,9 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
 
         logging.debug('DeviceInterface instantiated, lang=%s', lang)
 
+        if parallel_mode and lang != "Python":
+            raise NotImplementedError("Parallel mode has not been implemented for languages other than Python")
+
         if lang == "CUDA":
             dev = CudaFunctions(device, compiler_options=compiler_options, iterations=iterations, observers=observers)
         elif lang.upper() == "CUPY":
@@ -237,7 +241,7 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
         elif lang == "C":
             dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations)
         elif lang == "Python":
-            dev = PythonFunctions(iterations=iterations)
+            dev = PythonFunctions(iterations=iterations, observers=observers, parallel_mode=parallel_mode, show_progressbar=True)
         else:
             raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
 
diff --git a/kernel_tuner/cuda.py b/kernel_tuner/cuda.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/cupy.py b/kernel_tuner/cupy.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
old mode 100644
new mode 100755
index e5cddcdb8..14f5dfd71
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -403,7 +403,7 @@ def _get_docstring(opts):
 def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params, grid_div_x=None, grid_div_y=None, grid_div_z=None, restrictions=None,
                 answer=None, atol=1e-6, verify=None, verbose=False, lang=None, device=0, platform=0, smem_args=None, cmem_args=None, texmem_args=None,
                 compiler=None, compiler_options=None, log=None, iterations=7, block_size_names=None, quiet=False, strategy=None, strategy_options=None,
-                cache=None, metrics=None, simulation_mode=False, observers=None):
+                cache=None, metrics=None, simulation_mode=False, parallel_mode=False, observers=None):
 
     if log:
         logging.basicConfig(filename=kernel_name + datetime.now().strftime('%Y%m%d-%H:%M:%S') + '.log', level=log)
@@ -469,7 +469,7 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
 
     # select the runner for this job based on input
     selected_runner = SimulationRunner if simulation_mode is True else SequentialRunner
-    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers) as runner:
+    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers, parallel_mode) as runner:
 
         #the user-specified function may or may not have an optional atol argument;
         #we normalize it so that it always accepts atol.
@@ -498,7 +498,7 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
             else:
                 print("no results to report")
 
-        if cache:
+        if cache and not simulation_mode:
             util.close_cache(cache)
 
     return results, env
diff --git a/kernel_tuner/kernelbuilder.py b/kernel_tuner/kernelbuilder.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/nvml.py b/kernel_tuner/nvml.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/observers.py b/kernel_tuner/observers.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/opencl.py b/kernel_tuner/opencl.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
old mode 100644
new mode 100755
index 9655b068d..69c6ac33c
--- a/kernel_tuner/python.py
+++ b/kernel_tuner/python.py
@@ -1,25 +1,39 @@
-""" This module contains the functionality for running and compiling C functions """
+""" This module contains the functionality for running Python functions """
 
 from collections import namedtuple
 import platform
 import logging
+import warnings
 import importlib.util
+from math import ceil
+from time import perf_counter
+from typing import Tuple
 
-import numpy
-import numpy.ctypeslib
+# import cProfile
 
-from kernel_tuner.util import get_temp_filename, delete_temp_file, write_file
+import progressbar
+import numpy as np
+
+# for parallel subprocess runs
+from multiprocess import Manager, cpu_count, get_context    # using Pathos as Python's multiprocessing is unable to pickle
+from itertools import repeat
+import subprocess
+import sys
+from os import getpid
+
+from kernel_tuner.util import get_temp_filename, delete_temp_file
 
 # This represents an individual kernel argument.
 # It contains a numpy object (ndarray or number) and a ctypes object with a copy
 # of the argument data. For an ndarray, the ctypes object is a wrapper for the ndarray's data.
 Argument = namedtuple("Argument", ["numpy", "ctypes"])
+invalid_value = 1e20
 
 
 class PythonFunctions(object):
     """Class that groups the code for running and compiling C functions"""
 
-    def __init__(self, iterations=7):
+    def __init__(self, iterations=7, observers=None, parallel_mode=False, show_progressbar=False):
         """instantiate PythonFunctions object used for interacting with Python code
 
         :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
@@ -27,12 +41,24 @@ def __init__(self, iterations=7):
         """
         self.iterations = iterations
         self.max_threads = 1024
+        self.show_progressbar = show_progressbar
 
         #environment info
         env = dict()
         env["iterations"] = self.iterations
         self.env = env
         self.name = platform.processor()
+        self.observers = observers or []
+        self.parallel_mode = parallel_mode
+
+        self.benchmark_times = []
+
+        if self.parallel_mode:
+            warnings.warn(
+                "Be sure to check that simulation mode is true for the kernel, because parallel mode requires a completed cache file to avoid race conditions.")
+
+        if len(self.observers) > 0 and self.parallel_mode:
+            raise NotImplementedError("Observers are currently not implemented for parallel execution.")
 
     def __enter__(self):
         return self
@@ -95,27 +121,107 @@ def benchmark(self, func, args, threads, grid):
         :returns: All execution times.
         :rtype: dict()
         """
+
+        # For reference: the following times were obtained with 35 repeats on random_sample strategy.
+        # As seen, there is a lot of overhead with subproceses; directly executing the function scales much better.
+        # time taken by sequential: 20.7 sec
+        # time taken by parallel in sequential form (subprocess overhead): 46.3 sec
+        # time taken by parallel subprocesses: 7.5 sec on 9, 9.9 sec on 8, 13.6 sec on 4, 27.8 sec on 2, 45.9 sec on 1
+        # time taken by parallel directly: 2.99 sec on 9, 4.0 sec on 8, 5.23 sec on 4, 11.3 sec on 2, 19.3 sec on 1
+
         result = dict()
         result["times"] = []
-        for _ in range(self.iterations):
-            value = self.run_kernel(func, args, threads, grid)
-
-            #I would like to replace the following with actually capturing
-            #stderr and detecting the error directly in Python, it proved
-            #however that capturing stderr for non-Python functions from Python
-            #is a rather difficult thing to do
-            #
-            #The current, less than ideal, scheme uses the convention that a
-            #negative time indicates a 'too many resources requested for launch'
-            #which Kernel Tuner can silently ignore
-            if value < 0.0:
-                raise Exception("too many resources requested for launch")
-
-            result["times"].append(value)
-        result["time"] = numpy.mean(result["times"])
+        min_valid_iterations = ceil(self.iterations * 0.8)
+        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
+            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
+
+        # new implementation
+        start_time = perf_counter()
+        if self.parallel_mode and cpu_count() > 1:
+            num_procs = max(min(cpu_count() - 2, self.iterations), 1)
+            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+            manager = Manager()
+            invalid_flag = manager.Value('i', int(False))
+            MNE_values = manager.list()
+            runtimes = manager.list()
+            warnings_dicts = manager.list()
+            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+                args = func, args, self.params, invalid_flag
+                MNE_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                MNE_values, runtimes, warnings_dicts = list(MNE_values), list(runtimes), list(warnings_dicts)
+            result["strategy_time"] = np.mean(runtimes)
+            warning_dict = warnings_dicts[0]
+            for key in warning_dict.keys():
+                warning_dict[key] = np.mean(list(warnings_dict[key] for warnings_dict in warnings_dicts))
+            result["warnings"] = warning_dict
+        else:
+            raise NotImplementedError("Sequential mode has not been implemented yet")
+
+        benchmark_time = perf_counter() - start_time
+        self.benchmark_times.append(benchmark_time)
+        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
+
+        grandmean, times = get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations)
+        result["times"] = times
+        result["time"] = grandmean
+        print(f"Grandmean over kernels: {grandmean}, mean MNE per iteration: {np.mean(times)}, std MNE per iteration: {np.std(times)}")
+        return result
+
+        start_time = perf_counter()
+        if self.parallel_mode:
+            num_procs = max(cpu_count() - 1, 1)
+            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+            manager = Manager()
+            MRE_values = manager.list()
+            runtimes = manager.list()
+            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+                args = func, args, self.params
+                MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                MRE_values, runtimes = list(MRE_values), list(runtimes)
+                print(MRE_values)
+            result["times"] = values
+            result["strategy_time"] = np.mean(runtimes)
+            np_results = np.array(values)
+        else:
+            # sequential implementation
+            np_results = np.array([])
+            for iter in iterator:
+                for obs in self.observers:
+                    obs.before_start()
+                value = self.run_kernel(func, args)
+                for obs in self.observers:
+                    obs.after_finish()
+
+                if value < 0.0:
+                    raise ValueError("Invalid benchmark result")
+
+                result["times"].append(value)
+                np_results = np.append(np_results, value)
+                if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
+                    break
+
+            # fill up the remaining iters with invalid in case of a break
+            result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
+
+            # finish by instrumenting the results with the observers
+            for obs in self.observers:
+                result.update(obs.get_results())
+
+        benchmark_time = perf_counter() - start_time
+        self.benchmark_times.append(benchmark_time)
+        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
+
+        # calculate the mean of the means of the Mean Relative Error over the valid results
+        valid_results = np_results[np_results < invalid_value]
+        mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
+
+        # write the 'time' to the results and return
+        if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
+            mean_mean_MRE = invalid_value
+        result["time"] = mean_mean_MRE
         return result
 
-    def run_kernel(self, func, args, threads, grid):
+    def run_kernel(self, func, args):
         """runs the kernel once, returns whatever the kernel returns
 
         :param func: A C function compiled for this specific configuration
@@ -140,8 +246,99 @@ def run_kernel(self, func, args, threads, grid):
         logging.debug("run_kernel")
         logging.debug("arguments=" + str([str(arg) for arg in args]))
 
-        time = func(**self.params)
+        time = func(*args, **self.params)
 
         return time
 
     units = {}
+
+
+def run_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
+    """ Function to run a kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
+    PID = getpid()
+    print(f"Iter {iter+1}, PID {PID}", flush=True)
+    func, funcargs, params, invalid_flag = args
+    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
+    logging.debug("arguments=" + str([str(arg) for arg in funcargs]))
+
+    # run the kernel
+    starttime = perf_counter()
+    # cProfile.runctx('func(invalid_flag, *funcargs, **params)', globals(), locals(), 'profile-%s.out' % str(iter + 1))
+    # values, warning_dict = None, None
+    values, warning_dict = func(invalid_flag, *funcargs, **params)
+    runtime = perf_counter() - starttime
+    return values, runtime, warning_dict
+
+
+def run_kernel_as_subprocess(iter, args):
+    """ Function to run a kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
+    func, args, params = args
+    PID = getpid()
+    # print(f"Iter {iter}, PID {PID}", flush=True)
+    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
+    logging.debug("arguments=" + str([str(arg) for arg in args]))
+
+    def make_kwargstrings(**kwargs) -> list:
+        return list(f"{key}={value}" for key, value in kwargs.items())
+
+    # Subprocess
+    args += make_kwargstrings(**params)
+    proc = subprocess.run([sys.executable or 'python', str(func.__name__ + '.py')] + args, shell=False, capture_output=True)
+    stderr = f"subprocess {iter} with PID {PID} errors: {proc.stderr.decode('utf-8')}" if len(proc.stderr.decode('utf-8')) > 0 else ""
+    stdout = f"subprocess {iter} with PID {PID} output: {proc.stdout.decode('utf-8')}" if len(proc.stdout.decode('utf-8')) > 0 else ""
+
+    if stderr != "":
+        logging.debug(stderr)
+        print(stderr)
+    if stdout != "":
+        logging.debug(stdout)
+        # print(stdout)
+
+    time = float(stdout.split("result_value=")[1])
+    return time
+
+
+def get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations=1):
+    """ Get the grandmean (mean of median MNE per kernel) and mean MNE per iteration """
+    MNE_values = np.array(MNE_values)
+    median_MNEs = np.array([])
+    valid_MNE_times = list()
+    # get the mean MNE per kernel
+    for i in range(len(MNE_values[0])):
+        MNE_kernel_values = MNE_values[:, i]
+        valid_MNE_mask = (MNE_kernel_values < invalid_value) & (MNE_kernel_values >= 0)
+        valid_MNE_kernel_values = MNE_kernel_values[valid_MNE_mask]
+        if len(valid_MNE_kernel_values) >= min_valid_iterations:
+            # # filter outliers by keeping only values that are within two times the Median Absolute Deviation
+            # AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            # MAD = np.median(AD)
+            # selected_MNE_kernel_values = valid_MNE_kernel_values[AD < MAD * 3]
+            # print(f"Removed {len(valid_MNE_kernel_values) - len(selected_MNE_kernel_values)}")
+            # median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
+            # median_MNEs = np.append(median_MNEs, np.mean(valid_MNE_kernel_values))
+
+            # filter outliers by keeping only values that are within three times the Median Absolute Deviation
+            AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            MAD = np.median(AD)
+            MAD_score = AD / MAD if MAD else 0.0
+            selected_MNE_kernel_values = valid_MNE_kernel_values[MAD_score < 3]
+            median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
+        else:
+            median_MNEs = np.append(median_MNEs, invalid_value)
+
+    # get the mean MNE per iteration
+    for i in range(len(MNE_values)):
+        MNE_iteration_values = MNE_values[i]
+        valid_MNE_mask = (MNE_iteration_values < invalid_value) & (MNE_iteration_values >= 0)
+        valid_MNE_iteration_values = MNE_iteration_values[valid_MNE_mask]
+        if len(valid_MNE_iteration_values) > 0:
+            valid_MNE_times.append(np.mean(valid_MNE_iteration_values))
+        else:
+            valid_MNE_times.append(invalid_value)
+
+    # get the grandmean by taking the mean over the median MNE per iteration, invalid if one of the kernels is invalid
+    print(median_MNEs)
+    grandmean_MNE = np.mean(median_MNEs)
+    if np.isnan(grandmean_MNE) or len(median_MNEs[median_MNEs >= invalid_value]) > 0:
+        grandmean_MNE = invalid_value
+    return grandmean_MNE, valid_MNE_times
diff --git a/kernel_tuner/runners/__init__.py b/kernel_tuner/runners/__init__.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
old mode 100644
new mode 100755
index 05b94121b..20fbfaa7b
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -11,7 +11,7 @@
 class SequentialRunner(object):
     """ SequentialRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
         """ Instantiate the SequentialRunner
 
         :param kernel_source: The kernel source
@@ -30,14 +30,12 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         """
 
         #detect language and create high-level device interface
-        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options).__enter__()
+        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, parallel_mode=parallel_mode, **device_options).__enter__()
 
         self.units = self.dev.units
         self.quiet = device_options.quiet
         self.kernel_source = kernel_source
-
-        self.warmed_up = False
-
+        self.warmed_up = True if kernel_source.lang == 'Python' else False
         self.simulation_mode = False
 
         #move data to the GPU
@@ -80,10 +78,9 @@ def run(self, parameter_space, kernel_options, tuning_options):
 
             #check if element is in the cache
             x_int = ",".join([str(i) for i in element])
-            if tuning_options.cache:
-                if x_int in tuning_options.cache:
-                    results.append(tuning_options.cache[x_int])
-                    continue
+            if tuning_options.cache and x_int in tuning_options.cache:
+                results.append(tuning_options.cache[x_int])
+                continue
 
             result = self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, kernel_options, tuning_options)
             if result is None:
@@ -106,7 +103,7 @@ def run(self, parameter_space, kernel_options, tuning_options):
             if tuning_options.metrics:
                 params = process_metrics(params, tuning_options.metrics)
 
-            print_config_output(tuning_options.tune_params, params, self.quiet, tuning_options.metrics, self.units)
+            # print_config_output(tuning_options.tune_params, params, self.quiet, tuning_options.metrics, self.units) # TODO uncomment
 
             store_cache(x_int, params, tuning_options)
             results.append(params)
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
old mode 100644
new mode 100755
index aba6dc88c..9e58634a5
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -173,7 +173,7 @@ def __exit__(self, *exc):
 class SimulationRunner(object):
     """ SimulationRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
         """ Instantiate the SimulationRunner
 
         :param kernel_source: The kernel source
diff --git a/kernel_tuner/strategies/__init__.py b/kernel_tuner/strategies/__init__.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
old mode 100644
new mode 100755
index 594f4aa23..e4809be7f
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -1,22 +1,42 @@
 """ Lean implementation of Bayesian Optimization with GPyTorch """
+# python
 from copy import deepcopy
-from typing import Any, Tuple
+from typing import Tuple
 from random import randint, shuffle, choice
 from math import ceil
+import warnings
+import ast    # for casting strings to dict
+
+# external
 import numpy as np
-from numpy.lib.arraysetops import unique
 from numpy.random import default_rng
 import torch
 import gpytorch
+import arviz as az
 
-from kernel_tuner.util import get_valid_configs, config_valid
+# internal
+from kernel_tuner.util import get_valid_configs
 from kernel_tuner.strategies import minimize
 
+# set supported hyperparameter values
+supported_precisions = ['float', 'double']
 supported_initial_sample_methods = ['lhs', 'index', 'random']
 supported_methods = ['ei', 'poi', 'random']
 supported_cov_kernels = ['matern', 'matern_scalekernel']
 supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
-supported_optimizers = ['LBFGS', 'Adam']
+supported_optimizers = ['LBFGS', 'Adam', 'AdamW', 'Adagrad', 'ASGD']
+
+
+# set complex hyperparameter defaults
+def default_optimizer_learningrates(key):
+    defaults = {
+        'LBFGS': 1,
+        'Adam': 0.001,
+        'AdamW': 0.001,
+        'ASGD': 0.01,
+        'Adagrad': 0.01
+    }
+    return defaults[key]
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
@@ -50,18 +70,15 @@ def tune(runner, kernel_options, device_options, tuning_options):
     if cuda_available:
         print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
 
-
     # retrieve options with defaults
     options = tuning_options.strategy_options
     optimization_direction = options.get("optimization_direction", 'min')
-    num_initial_samples = options.get("popsize", 20)
-    max_fevals = options.get("max_fevals", 100)
+    num_initial_samples = int(options.get("popsize", 20))
+    max_fevals = int(options.get("max_fevals", 220))
     max_threads = runner.dev.max_threads
-    if max_fevals < num_initial_samples:
-        raise ValueError(f"Maximum number of function evaluations ({max_fevals}) can not be lower than the number of initial samples ({num_initial_samples}) ")
 
-    # enabling scaling will unscale and snap inputs on evaluation, more efficient to keep unscale values in a lookup table
-    tuning_options["snap"] = True
+    # enabling scaling will unscale and snap inputs on evaluation, more efficient to scale all at once and keep unscaled values
+    tuning_options["snap"] = False
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
@@ -69,12 +86,14 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     # limit max_fevals to max size of the parameter space
     max_fevals = min(len(parameter_space), max_fevals)
+    if max_fevals < num_initial_samples:
+        raise ValueError(
+            f"Maximum number of function evaluations ({max_fevals}) can not be lower than or equal to the number of initial samples ({num_initial_samples}), you might as well brute-force."
+        )
 
     # execute Bayesian Optimization
     BO = BayesianOptimization(parameter_space, kernel_options, tuning_options, runner, num_initial_samples, optimization_direction, device)
-    # BO.visualize()
     all_results = BO.optimize(max_fevals)
-    # BO.visualize()
 
     return all_results, runner.dev.get_environment()
 
@@ -97,7 +116,8 @@ def forward(self, x):
 
 class BayesianOptimization:
 
-    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str, device: torch.device) -> None:
+    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str,
+                 device: torch.device) -> None:
         self.animate = False    # TODO remove
 
         # set defaults
@@ -113,37 +133,40 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.runner = runner
         self.max_threads = runner.dev.max_threads
 
+        # get precision options
+        self.dtype = torch.float if self.get_hyperparam("precision", "float", supported_precisions) == "float" else torch.double
+        self.min_std = self.get_hyperparam("minimum_std", 1e-6, type=float)
+
         # get tuning options
         self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
-        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1)
-        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 50)
-        self.training_iter = self.get_hyperparam("trainingiter", 3)
+        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)
+        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)
+        self.training_iter = self.get_hyperparam("trainingiter", 1, type=int)
         self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
-        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5)
+        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 0.5, type=float)
         self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
-        self.optimizer_name = self.get_hyperparam("optimizer", "Adam", supported_optimizers)
-        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", 0.1)
+        self.optimizer_name = self.get_hyperparam("optimizer", "LBFGS", supported_optimizers)
+        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", self.optimizer_name, type=float, cast=default_optimizer_learningrates)
         acquisition_function_name = self.get_hyperparam("method", "ei", supported_methods)
-        af_params = self.get_hyperparam("methodparams", {})
+        af_params = self.get_hyperparam("methodparams", {}, type=dict, cast=ast.literal_eval)
 
         # set acquisition function options
         self.set_acquisition_function(acquisition_function_name)
         if 'explorationfactor' not in af_params:
-            af_params['explorationfactor'] = 'CV'
+            af_params['explorationfactor'] = 0.1
         self.af_params = af_params
 
         # set Tensors
-        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
         self.device = device
         self.out_device = torch.device("cpu")
-        self.dtype = torch.double
         self.size = len(parameter_space)
-        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
         self.index_counter = torch.arange(self.size)
+        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
+        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
         self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
         self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
-        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan             # x (param configs) and y (results) must be the same type
-        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device) * 1e-3
+        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan    # x (param configs) and y (results) must be the same type
+        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device)    # only a valid assumption if outputs are normalized
 
         # transform non-numerical parameters to numerical, keep true_param_configs for evaluation function
         self.param_configs, self.tune_params = self.transform_nonnumerical_params(parameter_space)
@@ -180,6 +203,11 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
             'lengthscale': np.nan,
             'noise': np.nan,
         }
+        self.hyperparams_means = {
+            'loss': np.array([]),
+            'lengthscale': np.array([]),
+            'noise': np.array([]),
+        }
         self.initialize_model()
 
     @property
@@ -232,15 +260,19 @@ def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tenso
         masked_counter = self.index_counter[self.unvisited_configs]
         return masked_counter.index_select(0, target_indices)
 
-    def initialize_model(self):
+    def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
         """ Initialize the surrogate model """
-        self.initial_sample()
+        if not self.runner.simulation_mode:
+            self.import_cached_evaluations()
+        self.initial_sample_std = self.min_std
+        if take_initial_sample:
+            self.initial_sample()
 
         # create the model
         if self.likelihood_name == 'Gaussian':
             self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
         elif self.likelihood_name == 'FixedNoise':
-            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=1.0e-4), learn_additional_noise=False)
+            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=False)
         self.likelihood = self.likelihood.to(self.device)
         self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
 
@@ -249,18 +281,46 @@ def initialize_model(self):
         self.likelihood.train()
         model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
 
-        # LBFGS is probably better as Adam is only first-order
+        # set the optimizer
+        # LBFGS is probably better as Adam is first-order
         if self.optimizer_name == 'LBFGS':
             self.optimizer = torch.optim.LBFGS(model_parameters, lr=self.optimizer_learningrate)
         elif self.optimizer_name == 'Adam':
             self.optimizer = torch.optim.Adam(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'AdamW':
+            self.optimizer = torch.optim.AdamW(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'ASGD':
+            self.optimizer = torch.optim.ASGD(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'Adagrad':
+            self.optimizer = torch.optim.Adagrad(model_parameters, lr=self.optimizer_learningrate)
 
         self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model).to(self.device)
-        self.train_hyperparams(self.initial_training_iter)
+        if train_hyperparams:
+            self.train_hyperparams(self.initial_training_iter)
+        else:
+            self.train_hyperparams(0)
+
+    def import_cached_evaluations(self):
+        """ Import the previously evaluated configurations into this run """
+        # make strings of all the parameter configurations in the search space
+        param_config_strings = list()
+        for param_config in self.true_param_configs:
+            param_config_strings.append(",".join([str(v) for v in param_config]))
+
+        # load the results from the cache into the run
+        cache = self.tuning_options.cache
+        if len(cache.keys()) > 0:
+            print("Previous cachefile found while not in simulation mode, importing previous evaluations.")
+        for param_config_string, result in cache.items():
+            # get the index of the string in the search space
+            param_config_index = param_config_strings.index(param_config_string)
+            time = self.evaluate_config(param_config_index)
+            assert time == result['time']
+        print(f"Imported {len(self.all_results)} previously evaluated configurations.")
 
     def initial_sample(self):
         """ Take an initial sample of the parameter space """
-        list_param_config_indices = list()
+        list_param_config_indices = list(self.index_counter[~self.unvisited_configs])
 
         # generate a random offset from a normal distribution to add to the sample indices
         rng = default_rng()
@@ -270,9 +330,11 @@ def initial_sample(self):
         random_offsets = np.round(rng.standard_normal(self.num_initial_samples) * random_offset_size)
 
         # first apply the initial sampling method
-        if self.initial_sample_method == 'lhs':
+        if self.initial_sample_method == 'lhs' and self.num_initial_samples - self.fevals > 1:
             indices = self.get_lhs_samples(random_offsets)
             for param_config_index in indices.tolist():
+                if param_config_index in list_param_config_indices:
+                    continue
                 list_param_config_indices.append(param_config_index)
                 self.evaluate_config(param_config_index)
         elif self.initial_sample_method == 'random':
@@ -286,21 +348,27 @@ def initial_sample(self):
         # then take index-spaced samples until all samples are valid
         while self.fevals < self.num_initial_samples:
             least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size-1)
+            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size - 1)
+            if param_config_index in list_param_config_indices:
+                warnings.warn(
+                    f"An already evaluated configuration ({param_config_index}) was selected for index-spaced sampling. " +
+                    "If this happens regularly, reduce the initial sample random offset factor.", AlreadyEvaluatedConflict)
+                param_config_index = least_evaluated_region_index
             list_param_config_indices.append(param_config_index)
             self.evaluate_config(param_config_index)
 
         # set the current optimum, initial sample mean and initial sample std
         self.current_optimum = self.opt(self.train_y).item()
         self.initial_sample_mean = self.train_y.mean().item()
-        self.initial_sample_std = None
+        # self.initial_sample_std = self.train_y.std().item()
+        self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
 
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
 
     def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
         """ Get a centered Latin Hypercube Sample with a random offset """
-        n_samples = self.num_initial_samples
+        n_samples = self.num_initial_samples - self.fevals
 
         # first get the seperate parameter values to make possibly fictional distributed parameter configurations
         temp_param_configs = [[] for _ in range(n_samples)]
@@ -321,13 +389,14 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
 
         # create a tensor of the possibly fictional parameter configurations
         param_configs = torch.tensor(list(tuple(param_config) for param_config in temp_param_configs), dtype=self.dtype).to(self.device)
-        param_configs = param_configs.unique(dim=0) # remove duplicates
+        param_configs = param_configs.unique(dim=0)    # remove duplicates
         n_samples_unique = len(param_configs)
 
         # get the indices of the parameter configurations
         num_params = len(self.param_configs[0])
-        minimum_required_num_matching_params = round(num_params * 0.75)  # set the number of parameter matches allowed to be dropped before the search is stopped
-        param_configs_indices = torch.full((n_samples_unique,), -1, dtype=torch.int)
+        minimum_required_num_matching_params = round(num_params *
+                                                     0.75)    # set the number of parameter matches allowed to be dropped before the search is stopped
+        param_configs_indices = torch.full((n_samples_unique, ), -1, dtype=torch.int)
         for selected_index, selected_param_config in enumerate(param_configs):
             # for each parameter configuration, count the number of matching parameters
             required_num_matching_params = num_params
@@ -350,15 +419,16 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
                 continue
 
             # set the selected index
-            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size-1)
+            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size - 1)
 
         # filter -1 indices and duplicates that occurred because of the random offset
         param_configs_indices = param_configs_indices[param_configs_indices >= 0]
         param_configs_indices = param_configs_indices.unique().type(torch.int)
         if len(param_configs_indices) < n_samples / 2:
-            print(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1.",
-                  f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params}).",
-                  "Perhaps try something other than LHS.")
+            warnings.warn(
+                str(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1." +
+                    f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params})." +
+                    "Perhaps try something other than LHS."))
         return param_configs_indices
 
     def get_middle_index_of_least_evaluated_region(self) -> int:
@@ -379,11 +449,6 @@ def get_middle_index_of_least_evaluated_region(self) -> int:
         # print(f"Max distance {biggest_distance}, index: {middle_index}, between: {biggest_distance_index-biggest_distance}-{biggest_distance_index}")
         return middle_index
 
-    def find_nearest(self, value, array: torch.Tensor):
-        """ Find the value nearest to the given value in the array """
-        index = (torch.abs(array - value)).argmin()
-        return array[index]
-
     def train_hyperparams(self, training_iter: int):
         """ Optimize the surrogate model hyperparameters iteratively """
         self.model.train()
@@ -395,37 +460,55 @@ def closure():
             try:
                 loss = -self.mll(output, self.train_y)    # calculate loss and backprop gradients
                 loss.backward()
+                # large sudden increase in loss signals numerical instability
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore", category=RuntimeWarning)
+                    no_nan_losses = self.hyperparams_means['loss'][~np.isnan(self.hyperparams_means['loss'])]
+                    if len(no_nan_losses) > 1 and loss.item() > np.mean(no_nan_losses) * 2:
+                        warnings.warn("Avoiding loss surge, aborting training", AvoidedLossSurgeWarning)
+                        return np.nan
                 return loss
             except gpytorch.utils.errors.NotPSDError:
-                print(f"WARNING - matrix not positive definite during training")
+                warnings.warn("Matrix not positive definite during training", NotPSDTrainingWarning)
+                return np.nan
 
         loss = None
         for _ in range(training_iter):
-            _loss = self.optimizer.step(closure)
-            if _loss is not None:
+            try:
+                _loss = self.optimizer.step(closure)
+                if _loss is np.nan:
+                    break
                 loss = _loss
+            except gpytorch.utils.errors.NanError:
+                warnings.warn("PSD_safe_Cholesky failed due to too many NaN", NaNTrainingWarning)
+                break
 
         # set the hyperparams to the new values
         try:
-            lengthscale = self.model.covar_module.lengthscale.item()
+            lengthscale = float(self.model.covar_module.lengthscale.item())
         except AttributeError:
-            lengthscale = self.model.covar_module.base_kernel.lengthscale.item()
+            lengthscale = float(self.model.covar_module.base_kernel.lengthscale.item())
+        loss = float(loss.item()) if loss is not None else np.nan
+        noise = float(self.model.likelihood.noise.mean().detach())
         self.hyperparams = {
-            'loss': float(loss.item()) if loss is not None else np.nan,
-            'lengthscale': float(lengthscale),
-            'noise': float(self.model.likelihood.noise.mean().detach()),
+            'loss': loss,
+            'lengthscale': lengthscale,
+            'noise': noise,
         }
+        self.hyperparams_means['loss'] = np.append(self.hyperparams_means['loss'], loss)
+        self.hyperparams_means['lengthscale'] = np.append(self.hyperparams_means['lengthscale'], lengthscale)
+        self.hyperparams_means['noise'] = np.append(self.hyperparams_means['noise'], noise)
 
         # get into evaluation (predictive posterior) mode
         self.model.eval()
         self.likelihood.eval()
 
-    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
+    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
         """ Optimize the objective """
         predictions_tuple = None
         short_param_config_index = None
         last_invalid = False
-        report_multiple_minima = round(self.size / 10)    # if more than 10% of the space is minima, print a warning
+        report_multiple_minima = ceil(round(self.size / 10))    # if more than 10% of the space is minima, print a warning
         use_contextual_variance = self.af_params['explorationfactor'] == 'CV'
         while self.fevals < max_fevals:
             if last_invalid:
@@ -435,37 +518,57 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
                 predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
             else:
                 predictions_tuple = self.predict_list()
-                if self.initial_sample_std is None:
-                    self.initial_sample_std = predictions_tuple[1].mean().item()
-            hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
-            acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
-            short_param_config_index = self.argopt(acquisition_values)
-            param_config_index = self.true_param_config_index(short_param_config_index)
-
-            # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
-            min_acquisition_function_value = acquisition_values[short_param_config_index]
-            indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
-            if len(indices_where_min) > 1:
-                # first get the true index for the minima
-                true_indices_where_min = self.true_param_config_indices(indices_where_min)
-                # then get the index of the least evaluated region
+                if self.initial_sample_std <= self.min_std:
+                    self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
+            # if there are NaN or all of the predicted std are the same, take from the least evaluated region
+            mean_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[0])).item())
+            std_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[1])).item())
+            if mean_has_NaN or std_has_NaN or torch.all(predictions_tuple[1] == predictions_tuple[1][0]):
                 least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-                # now find the minima closest to the least evaluated region
-                param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
-                short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
-                if len(indices_where_min) > report_multiple_minima:
-                    print(
-                        f"WARNING - after {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region"
-                    )
+                param_config_index = least_evaluated_region_index
+                short_param_config_index = -1
+                if mean_has_NaN:
+                    warning_reason = f"there were NaN in the predicted mean"
+                elif std_has_NaN:
+                    warning_reason = f"there were NaN in the predicted std"
+                else:
+                    warning_reason = "all STDs were the same"
+                warnings.warn(
+                    f"After {self.fevals}/{max_fevals} fevals, {warning_reason}, picking one from the least evaluated region and resetting the surrogate model",
+                    ResetModelWarning)
+                self.initialize_model(take_initial_sample=False, train_hyperparams=False)
+            else:
+                # otherwise, optimize the acquisition function to find the next candidate
+                hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
+                acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
+                short_param_config_index = self.argopt(acquisition_values)
+                param_config_index = self.true_param_config_index(short_param_config_index)
+
+                # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
+                min_acquisition_function_value = acquisition_values[short_param_config_index]
+                indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
+                if len(indices_where_min) > 1:
+                    # first get the true index for the minima
+                    true_indices_where_min = self.true_param_config_indices(indices_where_min)
+                    # then get the index of the least evaluated region
+                    least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
+                    # now find the minima closest to the least evaluated region
+                    param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
+                    short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
+                    if len(indices_where_min) > report_multiple_minima:
+                        warnings.warn(
+                            f"After {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region",
+                            MultipleMinimaWarning)
 
             # evaluate and register the result
             result = self.evaluate_config(param_config_index)
             if result == self.invalid_value and short_param_config_index > -1:
-                # can't use last_invalid if there were multiple minima in the acquisition function values, because short_param_config_index will not be set
+                # can't use last_invalid if short_param_config_index is not set
                 last_invalid = True
             else:
                 last_invalid = False
                 self.model.set_train_data(self.train_x, self.train_y, strict=False)
+                # do not train if there are multiple minima, because it introduces numerical instability or insolvability
                 if self.training_iter > 0:
                     self.train_hyperparams(training_iter=self.training_iter)
                 # set the current optimum
@@ -477,7 +580,7 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
         return self.all_results
 
     def objective_function(self, param_config: tuple) -> float:
-        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results)
+        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results, check_restrictions=False)
 
     def evaluate_config(self, param_config_index: int) -> float:
         """ Evaluates a parameter configuration, returns the time """
@@ -501,14 +604,15 @@ def register_result(self, result: float, param_config_index: int):
             self.valid_configs[param_config_index] = True
             self.results[param_config_index] = result
             assert last_result['time'] == result
-            self.results_std[param_config_index] = np.std(last_result['times'])
+            self.results_std[param_config_index] = max(np.std(last_result['times']), self.min_std)
 
-        # add the current model parameters to the results dict
+        # add the current model parameters to the last entry of the results dict
         if len(self.all_results) < 1:
             return
         for key, value in self.hyperparams.items():
-            last_result[key] = value
+            last_result["hyperparam_" + key] = value
         self.all_results[-1] = last_result
+        # TODO check if it is possible to write the results with hyperparameters to the cache if not in simulation mode, maybe with observer?
 
     def update_unique_results(self):
         """ Updates the unique results dictionary """
@@ -519,25 +623,22 @@ def update_unique_results(self):
     def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
         """ Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations """
         with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
-            observed_pred = self.likelihood(self.model(self.test_x))
-            mu = observed_pred.mean
-            std = observed_pred.variance.clamp(min=1e-9)    # TODO .sqrt() or not? looks like without is better
-            return mu, std
-
-    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Remove an index from a tuple of predictions """
-        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
-        list_random = list(range(len(self.unvisited_param_configs)))
-        shuffle(list_random)
-        return list_random
+            try:
+                observed_pred = self.likelihood(self.model(self.test_x))
+                mu = observed_pred.mean
+                std = observed_pred.variance.clamp(min=self.min_std)    # TODO .sqrt() or not? looks like without is better
+                return mu, std
+            except gpytorch.utils.errors.NanError:
+                warnings.warn("NaN error during predictions", NaNPredictionWarning)
+                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
+            except gpytorch.utils.errors.NotPSDError:
+                warnings.warn("NotPSD error during predictions", NotPSDPredictionWarning)
+                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
 
     def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
         """ compute probability of improvement by assuming normality on the difference in improvement """
         diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
-        diff_improvement = (diff_improvement - diff_improvement.mean()) / diff_improvement.std()    # force to N(0,1) with z-score
+        diff_improvement = (diff_improvement - diff_improvement.mean()) / max(diff_improvement.std(), self.min_std)    # force to N(0,1) with z-score
         if self.optimization_direction == 'max':
             diff_improvement = -diff_improvement
         return diff_improvement
@@ -556,11 +657,23 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
             # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
             x = 1 - min(max(1 - improvement_diff, 0.2), 0.0)
-            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2057, y=0.0.
+            # x = 1 - min(max(improvement_diff, 1) * 0.2, 0.0)
+            # the smaller the difference between the initial sample error and current sample error, the greater the exploration
+            # x = 1 - min(max(self.initial_sample_std - std.mean().item(), 1.0), 0.8)
+            # print(self.initial_sample_std, std.mean().item())
+            # print(x)
+            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2, y=0.003; at x=0.2057, y=0.0.
+            # print(cv)
             return cv
         else:
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
 
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = list(range(len(self.unvisited_param_configs)))
+        shuffle(list_random)
+        return list_random
+
     def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
         """ Acquisition function Probability of Improvement (PoI) tensor-based """
 
@@ -574,9 +687,9 @@ def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor,
         normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
         cdf = normal.cdf(diff_improvement)
 
-        # sanity check
-        if torch.all(cdf == cdf[0]):
-            raise ValueError("You need to scale the diff_improvement-values!")
+        # # sanity check
+        # if torch.all(cdf == cdf[0]):
+        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
         return cdf
 
     def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
@@ -593,9 +706,9 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
         cdf = normal.cdf(diff_improvement)
         pdf = torch.exp(normal.log_prob(diff_improvement))
 
-        # sanity check
-        if torch.all(cdf == cdf[0]) or torch.all(pdf == pdf[0]):
-            raise ValueError("You need to scale the diff_improvement-values!")
+        # # sanity check
+        # if torch.all(cdf == cdf[0]) and torch.all(pdf == pdf[0]):
+        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
 
         # compute expected improvement in bulk
         exp_improvement = (pdf + diff_improvement + y_std * cdf)
@@ -607,25 +720,6 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
     """ Helper functions """
     """                  """
 
-    def get_hyperparam(self, name: str, default, supported_values=list()):
-        """ Retrieve the value of a hyperparameter based on the name """
-        value = self.tuning_options.strategy_options.get(name, default)
-        if len(supported_values) > 0 and value not in supported_values:
-            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-        return value
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function based on the name """
-        if acquisition_function not in supported_methods:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-        if acquisition_function == 'poi':
-            self.acquisition_function = self.af_probability_of_improvement_tensor
-        elif acquisition_function == 'ei':
-            self.acquisition_function = self.af_expected_improvement_tensor
-        elif acquisition_function == 'random':
-            self.acquisition_function = self.af_random
-
     def apply_scaling_to_inputs(self):
         """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
         param_configs_scaled = torch.zeros_like(self.param_configs)
@@ -645,16 +739,57 @@ def apply_scaling_to_inputs(self):
         for param_index in range(len(self.param_configs[0])):
             v_min = v_min_list[param_index]
             v_diff = v_diff_list[param_index]
-            param_configs_scaled[:,param_index] = torch.sub(self.param_configs[:,param_index], v_min).div(v_diff)
+            param_configs_scaled[:, param_index] = torch.sub(self.param_configs[:, param_index], v_min).div(v_diff)
 
         # finally remove parameters that are constant by applying a mask
         unchanging_params_tensor = ~torch.tensor(unchanging_params_list, dtype=torch.bool)
-        if torch.all(unchanging_params_tensor == False):
-            raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
+        # if torch.all(unchanging_params_tensor == False):
+        # raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
         nonstatic_param_count = torch.count_nonzero(unchanging_params_tensor)
         self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
         for param_config_index, param_config in enumerate(param_configs_scaled):
             self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
+        self.nonstatic_params = unchanging_params_tensor
+
+    def find_nearest(self, value, array: torch.Tensor):
+        """ Find the value nearest to the given value in the array """
+        index = (torch.abs(array - value)).argmin()
+        return array[index]
+
+    def get_hyperparam(self, name: str, default, supported_values=list(), type=None, cast=None):
+        """ Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function """
+        value = self.tuning_options.strategy_options.get(name, default)
+
+        # check with predifined value list
+        if len(supported_values) > 0 and value not in supported_values:
+            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+        # cast to type if provided
+        if type and not isinstance(value, type):
+            if cast:
+                value = cast(value)
+            else:
+                value = type(value)
+
+        # exceptions with more complex types
+        if value == 'methodparams' and 'explorationfactor' in value and value['explorationfactor'] != 'CV':
+            value = float(value)
+        return value
+
+    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        """ Remove an index from a tuple of predictions """
+        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function based on the name """
+        if acquisition_function not in supported_methods:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+        if acquisition_function == 'poi':
+            self.acquisition_function = self.af_probability_of_improvement_tensor
+        elif acquisition_function == 'ei':
+            self.acquisition_function = self.af_expected_improvement_tensor
+        elif acquisition_function == 'random':
+            self.acquisition_function = self.af_random
 
     def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
         """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
@@ -664,7 +799,7 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
         # find out which parameters have nonnumerical or mixed types, and create a range of integers instead
         nonnumericals_exist = False
         nonnumerical_type = torch.zeros(number_of_params, dtype=torch.bool)
-        nonnumerical_values = [ [] for _ in range(number_of_params) ]
+        nonnumerical_values = [[] for _ in range(number_of_params)]
         tune_params = deepcopy(self.tuning_options.tune_params)
         for param_index, (param_key, param_values) in enumerate(self.tuning_options.tune_params.items()):
             if not all(isinstance(v, (int, float, complex)) for v in param_values):
@@ -675,7 +810,7 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
 
         # overwrite the nonnumerical parameters with numerical parameters
         if nonnumericals_exist:
-            self.tuning_options["snap"] = False     # snapping is only possible with numerical values
+            self.tuning_options["snap"] = False    # snapping is only possible with numerical values
             for param_config_index, param_config in enumerate(parameter_space):
                 parameter_space[param_config_index] = list(param_config)
                 for param_index, param_value in enumerate(param_config):
@@ -686,22 +821,73 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
 
         return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
 
+    def to_xarray(self):
+        # print(self.tuning_options['tune_params'])
+        # print(az.convert_to_inference_data(self.tuning_options['tune_params']).posterior)
+        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
+            posterior = self.model(self.param_configs_scaled)
+            predictive_posterior = self.likelihood(posterior)
+            # print(posterior.variance)
+            # print(az.convert_to_inference_data(posterior.to_data_independent_dist()))
+            # print(len(posterior.covariance_matrix))
+            # print(len(posterior.covariance_matrix[0]))
+            # exit(0)
+
+            # data = az.load_arviz_data('centered_eight')
+            # az.plot_posterior(data, show=True)
+
+            param_configs = list(tuple(pc) for pc in self.param_configs.tolist())
+            # posterior_dict = dict(zip(param_configs, posterior.get_base_samples()))
+            posterior_dict = {
+                'mu': posterior.mean,
+                'var': posterior.variance
+            }
+            predictive_posterior_dict = {
+                'mu': predictive_posterior.mean,
+                'var': predictive_posterior.variance
+            }
+            print(posterior_dict)
+            # predictive_posterior_dict = dict(zip(str(self.param_configs_scaled.numpy()), predictive_posterior.get_base_samples()))
+            # log_prob_dict = dict(zip(self.param_configs_scaled, predictive_posterior.log_prob()))
+            tune_param_keys = np.array(list(self.tune_params.keys()))[self.nonstatic_params]
+            tune_param_values = np.array(list(self.tune_params.values()), dtype=object)[self.nonstatic_params]
+            coordinates = dict(zip(tune_param_keys, tune_param_values))
+            dimensions = dict(zip(tune_param_keys, ([k] for k in tune_param_keys)))
+            print(coordinates)
+            print(dimensions)
+            data = az.from_dict(posterior_dict, posterior_predictive=predictive_posterior_dict)
+            print(az.summary(data))
+            print(data.posterior)
+            print(data.posterior_predictive)
+            az.plot_trace(data, show=True)
+            exit(0)
+            print(data.posterior_predictive)
+
+            # print(az.convert_to_inference_data(posterior.get_base_samples()))
+        # TODO create InferenceData
+        # print(predictive_posterior.sample())
+        # print(az.from_dict())
+        # print(az.convert_to_inference_data(predictive_posterior))
+        exit(0)
 
     def visualize(self):
         """ Visualize the surrogate model and observations in a plot """
         from matplotlib import pyplot as plt
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             # Initialize plot
-            f, ax = plt.subplots(1, 1, figsize=(10, 5))
+            f = plt.figure(constrained_layout=True, figsize=(10, 8))
+            subfigures = f.subfigures(2, 1)
+            ax = subfigures[0].subplots(1, 1)
+            axes2 = subfigures[1].subplots(1, 3)
             ax.set_ylabel('Value')
             ax.set_xlabel('Parameter')
 
-            param_configs = self.param_configs.to(self.out_device)
+            param_configs = self.true_param_configs
 
             # get true function
             objective_results = np.array([])
             for param_config in param_configs:
-                result = self.objective_function(tuple(param_config.tolist()))
+                result = self.objective_function(tuple(param_config))
                 if result == self.invalid_value:
                     result = np.nan
                 objective_results = np.append(objective_results, result)
@@ -748,8 +934,85 @@ def visualize(self):
             # ax.set_ylim(min(objective_results), max(filter(lambda x: x != self.invalid_value, objective_results)))
             ax.legend(['Objective Function', 'Initial Sample', 'Observed Data', 'Mean', 'Confidence'])
 
+            # draw the hyperparameter plots
+            # loss
+            axes2[0].plot(self.hyperparams_means['loss'])
+            axes2[0].set_ylabel('Loss')
+            axes2[0].set_xlabel('Number of evaluations')
+            # lengthscale
+            axes2[1].plot(self.hyperparams_means['lengthscale'])
+            axes2[1].set_ylabel('Lengthscale')
+            axes2[1].set_xlabel('Number of evaluations')
+            # noise
+            axes2[2].plot(self.hyperparams_means['noise'])
+            axes2[2].set_ylabel('Noise')
+            axes2[2].set_xlabel('Number of evaluations')
+
             if self.animate:
-                f.canvas.draw()
-                plt.pause(0.1)
+                # f.canvas.draw()
+                plt.savefig('animation_last_graph')
+                # plt.pause(0.1)
+
+            # plt.show()
+
+
+class CustomWarning(Warning):
+
+    def __init__(self, message: str, category: str) -> None:
+        # super().__init__()
+        self.message = message
+        self.category = category
+
+    def __str__(self):
+        return repr(self.message)
+
+    def category(self):
+        return self.category.__name__
+
+
+class AvoidedLossSurgeWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "AvoidedLossSurgeWarning")
+
+
+class NotPSDTrainingWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NotPSDTrainingWarning")
+
+
+class NaNTrainingWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NaNTrainingWarning")
+
+
+class NaNPredictionWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NaNPredictionWarning")
+
+
+class NotPSDPredictionWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NotPSDPredictionWarning")
+
+
+class ResetModelWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "ResetModelWarning")
+
+
+class MultipleMinimaWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "MultipleMinimaWarning")
+
+
+class AlreadyEvaluatedConflict(CustomWarning):
 
-            plt.show()
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "AlreadyEvaluatedConflict")
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py b/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py b/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py b/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py
old mode 100644
new mode 100755
index d3364f7d3..d72713908
--- a/kernel_tuner/strategies/brute_force.py
+++ b/kernel_tuner/strategies/brute_force.py
@@ -39,8 +39,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     # check for search space restrictions
     if restrictions is not None:
-        parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose),
-                                 parameter_space)
+        parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose), parameter_space)
 
     results, env = runner.run(parameter_space, kernel_options, tuning_options)
 
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/hillclimbers.py b/kernel_tuner/strategies/hillclimbers.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
old mode 100644
new mode 100755
index 14a33559e..eb9b1b81b
--- a/kernel_tuner/strategies/minimize.py
+++ b/kernel_tuner/strategies/minimize.py
@@ -56,7 +56,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
     return results, runner.dev.get_environment()
 
 
-def _cost_func(x, kernel_options, tuning_options, runner, results):
+def _cost_func(x, kernel_options, tuning_options, runner, results, check_restrictions=True):
     """ Cost function used by minimize """
 
     error_time = 1e20
@@ -80,7 +80,7 @@ def _cost_func(x, kernel_options, tuning_options, runner, results):
         return tuning_options.cache[x_int]["time"]
 
     # check if this is a legal (non-restricted) parameter instance
-    if tuning_options.restrictions:
+    if check_restrictions and tuning_options.restrictions:
         legal = util.check_restrictions(tuning_options.restrictions, params, tuning_options.tune_params.keys(), tuning_options.verbose)
         if not legal:
             error_result = OrderedDict(zip(tuning_options.tune_params.keys(), params))
diff --git a/kernel_tuner/strategies/mls.py b/kernel_tuner/strategies/mls.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/ordered_greedy_mls.py b/kernel_tuner/strategies/ordered_greedy_mls.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
old mode 100644
new mode 100755
index 838a54b97..332ac6750
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -606,7 +606,7 @@ def process_cache(cache, kernel_options, tuning_options, runner):
             if filestr[-1] == ",":
                 filestr = filestr[:-1]
             filestr = filestr + "}\n}"
-        else:
+        elif not tuning_options.simulation_mode:    # don't do this in simulation mode because the cache must have no race conditions in case of parallel execution
             # if it was properly closed, open it for appending new entries
             with open(cache, "w") as cachefile:
                 cachefile.write(filestr[:-3] + ",")
@@ -672,52 +672,22 @@ def dump_cache(obj: str, tuning_options):
             cachefile.write(obj)
 
 
-def parse_restrictions(restrictions: str):
+def parse_restrictions(restrictions: list):
     """" parses restrictions from a list of strings into a callable function """
-    operators = [ '+', '-', '*', '/', '%', '==', '!=', '(', ')', '[', ']' ]
 
+    regex_match_variable = r"([a-zA-Z_$][a-zA-Z_$0-9]*)"
     suffix = ' and '
     parsed_restrictions = ""
     for restriction in restrictions:
-        new = ""
-
-        # first make sure everything that should be space-seperated is
-        for index in range(len(restriction)):
-            if restriction[index] in operators and index > 0 and restriction[index-1] != ' ':
-                new += ' '
-            new += restriction[index]
-            if restriction[index] in operators and index < len(restriction) - 1 and restriction[index+1] != ' ':
-                new += ' '
-
-        restriction = new
-
-        # then parse each part
-        new = ""
-        words = restriction.split(" ")
-        for word in words:
-
-            # filter spaces and empty words
-            if word == ' ' or word == '':
-                continue
-
-            # filter the operators
-            if word in operators:
-                new += word + ' '
-                continue
-
-            # filter numbers
-            if np.char.isnumeric(word):
-                new += word + ' '
-                continue
-
-            # make variables a dictionary 'p' lookup
-            word = f"params['{word}']"
-            new += word
-            new += ' '
+        parsed_restrictions += re.sub(regex_match_variable, r'params["\1"]', restriction) + suffix
 
-        parsed_restrictions += (new + suffix)
+    # tidy up the code by removing the last suffix and unecessary spaces
+    parsed_restrictions = parsed_restrictions[:-len(suffix)]
+    parsed_restrictions = parsed_restrictions.strip()
+    parsed_restrictions = " ".join(parsed_restrictions.split())
 
-    parsed_restrictions = "def restrictions(params): \n return " + parsed_restrictions[:-len(suffix)]
+    # compile into a function
+    parsed_restrictions = f"def restrictions(params): return {parsed_restrictions} \n"
     code_object = compile(parsed_restrictions, '<string>', 'exec')
     func = FunctionType(code_object.co_consts[0], globals())
     return func
diff --git a/kernel_tuner/wrappers.py b/kernel_tuner/wrappers.py
old mode 100644
new mode 100755
diff --git a/roadmap.md b/roadmap.md
old mode 100644
new mode 100755
diff --git a/setup.cfg b/setup.cfg
old mode 100644
new mode 100755
diff --git a/setup.py b/setup.py
old mode 100644
new mode 100755
diff --git a/test/__init__.py b/test/__init__.py
old mode 100644
new mode 100755
diff --git a/test/context.py b/test/context.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_minimize.py b/test/strategies/test_minimize.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
old mode 100644
new mode 100755
diff --git a/test/test_c_functions.py b/test/test_c_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
old mode 100644
new mode 100755
diff --git a/test/test_core.py b/test/test_core.py
old mode 100644
new mode 100755
diff --git a/test/test_cuda_functions.py b/test/test_cuda_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_cuda_mocked.py b/test/test_cuda_mocked.py
old mode 100644
new mode 100755
diff --git a/test/test_cupy_functions.py b/test/test_cupy_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_hyper.py b/test/test_hyper.py
old mode 100644
new mode 100755
diff --git a/test/test_integration.py b/test/test_integration.py
old mode 100644
new mode 100755
diff --git a/test/test_interface.py b/test/test_interface.py
old mode 100644
new mode 100755
diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py
old mode 100644
new mode 100755
diff --git a/test/test_minimize.py b/test/test_minimize.py
old mode 100644
new mode 100755
diff --git a/test/test_observers.py b/test/test_observers.py
old mode 100644
new mode 100755
diff --git a/test/test_opencl_functions.py b/test/test_opencl_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_runners.py b/test/test_runners.py
old mode 100644
new mode 100755
diff --git a/test/test_util_functions.py b/test/test_util_functions.py
old mode 100644
new mode 100755
diff --git a/tutorial/README.md b/tutorial/README.md
old mode 100644
new mode 100755
diff --git a/tutorial/convolution.ipynb b/tutorial/convolution.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/diffusion.ipynb b/tutorial/diffusion.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/diffusion_opencl.ipynb b/tutorial/diffusion_opencl.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/diffusion_use_optparam.ipynb b/tutorial/diffusion_use_optparam.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/grid3d.ipynb b/tutorial/grid3d.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul.cu b/tutorial/matmul/matmul.cu
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul.png b/tutorial/matmul/matmul.png
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_naive.cu b/tutorial/matmul/matmul_naive.cu
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_naive.png b/tutorial/matmul/matmul_naive.png
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_shared.cu b/tutorial/matmul/matmul_shared.cu
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_shared.png b/tutorial/matmul/matmul_shared.png
old mode 100644
new mode 100755
diff --git a/tutorial/matrix_multiplication.ipynb b/tutorial/matrix_multiplication.ipynb
old mode 100644
new mode 100755

From cf1d4e4a14bb94ba0f7a9b252a4d1811842a637e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 16 Feb 2022 09:59:45 +0100
Subject: [PATCH 005/253] Reverted file permissions

---
 .gitattributes                                     |   0
 .github/workflows/docs.yml                         |   0
 .github/workflows/python-app.yml                   |   0
 .gitignore                                         |   0
 .zenodo.json                                       |   0
 CHANGELOG.md                                       |   0
 CITATION.cff                                       |   0
 CONTRIBUTING.rst                                   |   0
 INSTALL.rst                                        |   0
 LICENSE                                            |   0
 MANIFEST.in                                        |   0
 README.rst                                         |   0
 doc/Makefile                                       |   0
 doc/deploy.sh                                      |   0
 doc/gemm-amd-summary.png                           | Bin
 doc/gh_pages-deploy_key.enc                        | Bin
 doc/source/conf.py                                 |   0
 doc/source/contributing.rst                        |   0
 doc/source/correctness.rst                         |   0
 doc/source/design.png                              | Bin
 doc/source/design.rst                              |   0
 doc/source/examples.rst                            |   0
 doc/source/hostcode.rst                            |   0
 doc/source/index.rst                               |   0
 doc/source/install.rst                             |   0
 doc/source/templates.rst                           |   0
 doc/source/user-api.rst                            |   0
 doc/source/vocabulary.rst                          |   0
 examples/README.rst                                |   0
 examples/c/matrix_multiply.cpp                     |   0
 examples/c/matrix_multiply.py                      |   0
 examples/c/vector_add.py                           |   0
 examples/cuda-c++/vector_add.py                    |   0
 examples/cuda-c++/vector_add_blocksize.py          |   0
 examples/cuda-c++/vector_add_cupy.py               |   0
 examples/cuda/convolution.cu                       |   0
 examples/cuda/convolution.py                       |   0
 examples/cuda/convolution_correct.py               |   0
 examples/cuda/convolution_streams.cu               |   0
 examples/cuda/convolution_streams.py               |   0
 examples/cuda/expdist.cu                           |   0
 examples/cuda/expdist.py                           |   0
 examples/cuda/matmul.cu                            |   0
 examples/cuda/matmul.py                            |   0
 examples/cuda/pnpoly.cu                            |   0
 examples/cuda/pnpoly.py                            |   0
 examples/cuda/pnpoly_host.cu                       |   0
 examples/cuda/python_kernel.py                     |   0
 examples/cuda/reduction.cu                         |   0
 examples/cuda/reduction.py                         |   0
 examples/cuda/sepconv.py                           |   0
 examples/cuda/spmv.cu                              |   0
 examples/cuda/spmv.py                              |   0
 examples/cuda/stencil.cu                           |   0
 examples/cuda/stencil.py                           |   0
 examples/cuda/test_vector_add.py                   |   0
 examples/cuda/test_vector_add_parameterized.py     |   0
 examples/cuda/texture.py                           |   0
 examples/cuda/vector_add.py                        |   0
 examples/cuda/vector_add_codegen.py                |   0
 examples/cuda/vector_add_cupy.py                   |   0
 examples/cuda/vector_add_jinja.cu                  |   0
 examples/cuda/vector_add_jinja.py                  |   0
 examples/cuda/vector_add_jinja2.py                 |   0
 examples/cuda/vector_add_metric.py                 |   0
 examples/cuda/vector_add_observers.py              |   0
 examples/cuda/zeromeanfilter.cu                    |   0
 examples/cuda/zeromeanfilter.py                    |   0
 examples/fortran/test_vector_add.py                |   0
 examples/fortran/vector_add.F90                    |   0
 examples/fortran/vector_add.py                     |   0
 examples/fortran/vector_add_acc.F90                |   0
 examples/fortran/vector_add_acc.py                 |   0
 examples/opencl/convolution.cl                     |   0
 examples/opencl/convolution.py                     |   0
 examples/opencl/convolution_correct.py             |   0
 examples/opencl/matmul.cl                          |   0
 examples/opencl/matmul.py                          |   0
 examples/opencl/reduction.cl                       |   0
 examples/opencl/reduction.py                       |   0
 examples/opencl/sepconv.py                         |   0
 examples/opencl/stencil.cl                         |   0
 examples/opencl/stencil.py                         |   0
 examples/opencl/vector_add.py                      |   0
 examples/opencl/vector_add_codegen.py              |   0
 examples/opencl/vector_add_observers.py            |   0
 kernel_tuner/__init__.py                           |   0
 kernel_tuner/c.py                                  |   0
 kernel_tuner/core.py                               |   0
 kernel_tuner/cuda.py                               |   0
 kernel_tuner/cupy.py                               |   0
 kernel_tuner/hyper.py                              |   0
 kernel_tuner/integration.py                        |   0
 kernel_tuner/interface.py                          |   0
 kernel_tuner/kernelbuilder.py                      |   0
 kernel_tuner/nvml.py                               |   0
 kernel_tuner/observers.py                          |   0
 kernel_tuner/opencl.py                             |   0
 kernel_tuner/python.py                             |   0
 kernel_tuner/runners/__init__.py                   |   0
 kernel_tuner/runners/sequential.py                 |   0
 kernel_tuner/runners/simulation.py                 |   0
 kernel_tuner/strategies/__init__.py                |   0
 kernel_tuner/strategies/basinhopping.py            |   0
 kernel_tuner/strategies/bayes_opt.py               |   0
 kernel_tuner/strategies/bayes_opt_GPyTorch.py      |   0
 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py |   0
 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py   |   0
 kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py  |   0
 kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py  |   0
 kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py |   0
 kernel_tuner/strategies/bayes_opt_old.py           |   0
 kernel_tuner/strategies/brute_force.py             |   0
 kernel_tuner/strategies/diff_evo.py                |   0
 kernel_tuner/strategies/dual_annealing.py          |   0
 kernel_tuner/strategies/firefly_algorithm.py       |   0
 kernel_tuner/strategies/genetic_algorithm.py       |   0
 kernel_tuner/strategies/greedy_ils.py              |   0
 kernel_tuner/strategies/greedy_mls.py              |   0
 kernel_tuner/strategies/hillclimbers.py            |   0
 kernel_tuner/strategies/minimize.py                |   0
 kernel_tuner/strategies/mls.py                     |   0
 kernel_tuner/strategies/ordered_greedy_mls.py      |   0
 kernel_tuner/strategies/pso.py                     |   0
 kernel_tuner/strategies/random_sample.py           |   0
 kernel_tuner/strategies/simulated_annealing.py     |   0
 kernel_tuner/util.py                               |   0
 kernel_tuner/wrappers.py                           |   0
 roadmap.md                                         |   0
 setup.cfg                                          |   0
 setup.py                                           |   0
 test/__init__.py                                   |   0
 test/context.py                                    |   0
 test/strategies/test_bayesian_optimization.py      |   0
 test/strategies/test_genetic_algorithm.py          |   0
 test/strategies/test_minimize.py                   |   0
 test/strategies/test_strategies.py                 |   0
 test/test_c_functions.py                           |   0
 test/test_cache_file.json                          |   0
 test/test_core.py                                  |   0
 test/test_cuda_functions.py                        |   0
 test/test_cuda_mocked.py                           |   0
 test/test_cupy_functions.py                        |   0
 test/test_hyper.py                                 |   0
 test/test_integration.py                           |   0
 test/test_interface.py                             |   0
 test/test_kernelbuilder.py                         |   0
 test/test_minimize.py                              |   0
 test/test_observers.py                             |   0
 test/test_opencl_functions.py                      |   0
 test/test_runners.py                               |   0
 test/test_util_functions.py                        |   0
 tutorial/README.md                                 |   0
 tutorial/convolution.ipynb                         |   0
 tutorial/diffusion.ipynb                           |   0
 tutorial/diffusion_opencl.ipynb                    |   0
 tutorial/diffusion_use_optparam.ipynb              |   0
 tutorial/grid3d.ipynb                              |   0
 tutorial/matmul/matmul.cu                          |   0
 tutorial/matmul/matmul.png                         | Bin
 tutorial/matmul/matmul.py                          |   0
 tutorial/matmul/matmul_naive.cu                    |   0
 tutorial/matmul/matmul_naive.png                   | Bin
 tutorial/matmul/matmul_naive.py                    |   0
 tutorial/matmul/matmul_shared.cu                   |   0
 tutorial/matmul/matmul_shared.png                  | Bin
 tutorial/matmul/matmul_shared.py                   |   0
 tutorial/matrix_multiplication.ipynb               |   0
 168 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 .gitattributes
 mode change 100755 => 100644 .github/workflows/docs.yml
 mode change 100755 => 100644 .github/workflows/python-app.yml
 mode change 100755 => 100644 .gitignore
 mode change 100755 => 100644 .zenodo.json
 mode change 100755 => 100644 CHANGELOG.md
 mode change 100755 => 100644 CITATION.cff
 mode change 100755 => 100644 CONTRIBUTING.rst
 mode change 100755 => 100644 INSTALL.rst
 mode change 100755 => 100644 LICENSE
 mode change 100755 => 100644 MANIFEST.in
 mode change 100755 => 100644 README.rst
 mode change 100755 => 100644 doc/Makefile
 mode change 100755 => 100644 doc/deploy.sh
 mode change 100755 => 100644 doc/gemm-amd-summary.png
 mode change 100755 => 100644 doc/gh_pages-deploy_key.enc
 mode change 100755 => 100644 doc/source/conf.py
 mode change 100755 => 100644 doc/source/contributing.rst
 mode change 100755 => 100644 doc/source/correctness.rst
 mode change 100755 => 100644 doc/source/design.png
 mode change 100755 => 100644 doc/source/design.rst
 mode change 100755 => 100644 doc/source/examples.rst
 mode change 100755 => 100644 doc/source/hostcode.rst
 mode change 100755 => 100644 doc/source/index.rst
 mode change 100755 => 100644 doc/source/install.rst
 mode change 100755 => 100644 doc/source/templates.rst
 mode change 100755 => 100644 doc/source/user-api.rst
 mode change 100755 => 100644 doc/source/vocabulary.rst
 mode change 100755 => 100644 examples/README.rst
 mode change 100755 => 100644 examples/c/matrix_multiply.cpp
 mode change 100755 => 100644 examples/c/matrix_multiply.py
 mode change 100755 => 100644 examples/c/vector_add.py
 mode change 100755 => 100644 examples/cuda-c++/vector_add.py
 mode change 100755 => 100644 examples/cuda-c++/vector_add_blocksize.py
 mode change 100755 => 100644 examples/cuda-c++/vector_add_cupy.py
 mode change 100755 => 100644 examples/cuda/convolution.cu
 mode change 100755 => 100644 examples/cuda/convolution.py
 mode change 100755 => 100644 examples/cuda/convolution_correct.py
 mode change 100755 => 100644 examples/cuda/convolution_streams.cu
 mode change 100755 => 100644 examples/cuda/convolution_streams.py
 mode change 100755 => 100644 examples/cuda/expdist.cu
 mode change 100755 => 100644 examples/cuda/expdist.py
 mode change 100755 => 100644 examples/cuda/matmul.cu
 mode change 100755 => 100644 examples/cuda/matmul.py
 mode change 100755 => 100644 examples/cuda/pnpoly.cu
 mode change 100755 => 100644 examples/cuda/pnpoly.py
 mode change 100755 => 100644 examples/cuda/pnpoly_host.cu
 mode change 100755 => 100644 examples/cuda/python_kernel.py
 mode change 100755 => 100644 examples/cuda/reduction.cu
 mode change 100755 => 100644 examples/cuda/reduction.py
 mode change 100755 => 100644 examples/cuda/sepconv.py
 mode change 100755 => 100644 examples/cuda/spmv.cu
 mode change 100755 => 100644 examples/cuda/spmv.py
 mode change 100755 => 100644 examples/cuda/stencil.cu
 mode change 100755 => 100644 examples/cuda/stencil.py
 mode change 100755 => 100644 examples/cuda/test_vector_add.py
 mode change 100755 => 100644 examples/cuda/test_vector_add_parameterized.py
 mode change 100755 => 100644 examples/cuda/texture.py
 mode change 100755 => 100644 examples/cuda/vector_add.py
 mode change 100755 => 100644 examples/cuda/vector_add_codegen.py
 mode change 100755 => 100644 examples/cuda/vector_add_cupy.py
 mode change 100755 => 100644 examples/cuda/vector_add_jinja.cu
 mode change 100755 => 100644 examples/cuda/vector_add_jinja.py
 mode change 100755 => 100644 examples/cuda/vector_add_jinja2.py
 mode change 100755 => 100644 examples/cuda/vector_add_metric.py
 mode change 100755 => 100644 examples/cuda/vector_add_observers.py
 mode change 100755 => 100644 examples/cuda/zeromeanfilter.cu
 mode change 100755 => 100644 examples/cuda/zeromeanfilter.py
 mode change 100755 => 100644 examples/fortran/test_vector_add.py
 mode change 100755 => 100644 examples/fortran/vector_add.F90
 mode change 100755 => 100644 examples/fortran/vector_add.py
 mode change 100755 => 100644 examples/fortran/vector_add_acc.F90
 mode change 100755 => 100644 examples/fortran/vector_add_acc.py
 mode change 100755 => 100644 examples/opencl/convolution.cl
 mode change 100755 => 100644 examples/opencl/convolution.py
 mode change 100755 => 100644 examples/opencl/convolution_correct.py
 mode change 100755 => 100644 examples/opencl/matmul.cl
 mode change 100755 => 100644 examples/opencl/matmul.py
 mode change 100755 => 100644 examples/opencl/reduction.cl
 mode change 100755 => 100644 examples/opencl/reduction.py
 mode change 100755 => 100644 examples/opencl/sepconv.py
 mode change 100755 => 100644 examples/opencl/stencil.cl
 mode change 100755 => 100644 examples/opencl/stencil.py
 mode change 100755 => 100644 examples/opencl/vector_add.py
 mode change 100755 => 100644 examples/opencl/vector_add_codegen.py
 mode change 100755 => 100644 examples/opencl/vector_add_observers.py
 mode change 100755 => 100644 kernel_tuner/__init__.py
 mode change 100755 => 100644 kernel_tuner/c.py
 mode change 100755 => 100644 kernel_tuner/core.py
 mode change 100755 => 100644 kernel_tuner/cuda.py
 mode change 100755 => 100644 kernel_tuner/cupy.py
 mode change 100755 => 100644 kernel_tuner/hyper.py
 mode change 100755 => 100644 kernel_tuner/integration.py
 mode change 100755 => 100644 kernel_tuner/interface.py
 mode change 100755 => 100644 kernel_tuner/kernelbuilder.py
 mode change 100755 => 100644 kernel_tuner/nvml.py
 mode change 100755 => 100644 kernel_tuner/observers.py
 mode change 100755 => 100644 kernel_tuner/opencl.py
 mode change 100755 => 100644 kernel_tuner/python.py
 mode change 100755 => 100644 kernel_tuner/runners/__init__.py
 mode change 100755 => 100644 kernel_tuner/runners/sequential.py
 mode change 100755 => 100644 kernel_tuner/runners/simulation.py
 mode change 100755 => 100644 kernel_tuner/strategies/__init__.py
 mode change 100755 => 100644 kernel_tuner/strategies/basinhopping.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_old.py
 mode change 100755 => 100644 kernel_tuner/strategies/brute_force.py
 mode change 100755 => 100644 kernel_tuner/strategies/diff_evo.py
 mode change 100755 => 100644 kernel_tuner/strategies/dual_annealing.py
 mode change 100755 => 100644 kernel_tuner/strategies/firefly_algorithm.py
 mode change 100755 => 100644 kernel_tuner/strategies/genetic_algorithm.py
 mode change 100755 => 100644 kernel_tuner/strategies/greedy_ils.py
 mode change 100755 => 100644 kernel_tuner/strategies/greedy_mls.py
 mode change 100755 => 100644 kernel_tuner/strategies/hillclimbers.py
 mode change 100755 => 100644 kernel_tuner/strategies/minimize.py
 mode change 100755 => 100644 kernel_tuner/strategies/mls.py
 mode change 100755 => 100644 kernel_tuner/strategies/ordered_greedy_mls.py
 mode change 100755 => 100644 kernel_tuner/strategies/pso.py
 mode change 100755 => 100644 kernel_tuner/strategies/random_sample.py
 mode change 100755 => 100644 kernel_tuner/strategies/simulated_annealing.py
 mode change 100755 => 100644 kernel_tuner/util.py
 mode change 100755 => 100644 kernel_tuner/wrappers.py
 mode change 100755 => 100644 roadmap.md
 mode change 100755 => 100644 setup.cfg
 mode change 100755 => 100644 setup.py
 mode change 100755 => 100644 test/__init__.py
 mode change 100755 => 100644 test/context.py
 mode change 100755 => 100644 test/strategies/test_bayesian_optimization.py
 mode change 100755 => 100644 test/strategies/test_genetic_algorithm.py
 mode change 100755 => 100644 test/strategies/test_minimize.py
 mode change 100755 => 100644 test/strategies/test_strategies.py
 mode change 100755 => 100644 test/test_c_functions.py
 mode change 100755 => 100644 test/test_cache_file.json
 mode change 100755 => 100644 test/test_core.py
 mode change 100755 => 100644 test/test_cuda_functions.py
 mode change 100755 => 100644 test/test_cuda_mocked.py
 mode change 100755 => 100644 test/test_cupy_functions.py
 mode change 100755 => 100644 test/test_hyper.py
 mode change 100755 => 100644 test/test_integration.py
 mode change 100755 => 100644 test/test_interface.py
 mode change 100755 => 100644 test/test_kernelbuilder.py
 mode change 100755 => 100644 test/test_minimize.py
 mode change 100755 => 100644 test/test_observers.py
 mode change 100755 => 100644 test/test_opencl_functions.py
 mode change 100755 => 100644 test/test_runners.py
 mode change 100755 => 100644 test/test_util_functions.py
 mode change 100755 => 100644 tutorial/README.md
 mode change 100755 => 100644 tutorial/convolution.ipynb
 mode change 100755 => 100644 tutorial/diffusion.ipynb
 mode change 100755 => 100644 tutorial/diffusion_opencl.ipynb
 mode change 100755 => 100644 tutorial/diffusion_use_optparam.ipynb
 mode change 100755 => 100644 tutorial/grid3d.ipynb
 mode change 100755 => 100644 tutorial/matmul/matmul.cu
 mode change 100755 => 100644 tutorial/matmul/matmul.png
 mode change 100755 => 100644 tutorial/matmul/matmul.py
 mode change 100755 => 100644 tutorial/matmul/matmul_naive.cu
 mode change 100755 => 100644 tutorial/matmul/matmul_naive.png
 mode change 100755 => 100644 tutorial/matmul/matmul_naive.py
 mode change 100755 => 100644 tutorial/matmul/matmul_shared.cu
 mode change 100755 => 100644 tutorial/matmul/matmul_shared.png
 mode change 100755 => 100644 tutorial/matmul/matmul_shared.py
 mode change 100755 => 100644 tutorial/matrix_multiplication.ipynb

diff --git a/.gitattributes b/.gitattributes
old mode 100755
new mode 100644
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
old mode 100755
new mode 100644
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
old mode 100755
new mode 100644
diff --git a/.gitignore b/.gitignore
old mode 100755
new mode 100644
diff --git a/.zenodo.json b/.zenodo.json
old mode 100755
new mode 100644
diff --git a/CHANGELOG.md b/CHANGELOG.md
old mode 100755
new mode 100644
diff --git a/CITATION.cff b/CITATION.cff
old mode 100755
new mode 100644
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
old mode 100755
new mode 100644
diff --git a/INSTALL.rst b/INSTALL.rst
old mode 100755
new mode 100644
diff --git a/LICENSE b/LICENSE
old mode 100755
new mode 100644
diff --git a/MANIFEST.in b/MANIFEST.in
old mode 100755
new mode 100644
diff --git a/README.rst b/README.rst
old mode 100755
new mode 100644
diff --git a/doc/Makefile b/doc/Makefile
old mode 100755
new mode 100644
diff --git a/doc/deploy.sh b/doc/deploy.sh
old mode 100755
new mode 100644
diff --git a/doc/gemm-amd-summary.png b/doc/gemm-amd-summary.png
old mode 100755
new mode 100644
diff --git a/doc/gh_pages-deploy_key.enc b/doc/gh_pages-deploy_key.enc
old mode 100755
new mode 100644
diff --git a/doc/source/conf.py b/doc/source/conf.py
old mode 100755
new mode 100644
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
old mode 100755
new mode 100644
diff --git a/doc/source/correctness.rst b/doc/source/correctness.rst
old mode 100755
new mode 100644
diff --git a/doc/source/design.png b/doc/source/design.png
old mode 100755
new mode 100644
diff --git a/doc/source/design.rst b/doc/source/design.rst
old mode 100755
new mode 100644
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
old mode 100755
new mode 100644
diff --git a/doc/source/hostcode.rst b/doc/source/hostcode.rst
old mode 100755
new mode 100644
diff --git a/doc/source/index.rst b/doc/source/index.rst
old mode 100755
new mode 100644
diff --git a/doc/source/install.rst b/doc/source/install.rst
old mode 100755
new mode 100644
diff --git a/doc/source/templates.rst b/doc/source/templates.rst
old mode 100755
new mode 100644
diff --git a/doc/source/user-api.rst b/doc/source/user-api.rst
old mode 100755
new mode 100644
diff --git a/doc/source/vocabulary.rst b/doc/source/vocabulary.rst
old mode 100755
new mode 100644
diff --git a/examples/README.rst b/examples/README.rst
old mode 100755
new mode 100644
diff --git a/examples/c/matrix_multiply.cpp b/examples/c/matrix_multiply.cpp
old mode 100755
new mode 100644
diff --git a/examples/c/matrix_multiply.py b/examples/c/matrix_multiply.py
old mode 100755
new mode 100644
diff --git a/examples/c/vector_add.py b/examples/c/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda-c++/vector_add.py b/examples/cuda-c++/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda-c++/vector_add_blocksize.py b/examples/cuda-c++/vector_add_blocksize.py
old mode 100755
new mode 100644
diff --git a/examples/cuda-c++/vector_add_cupy.py b/examples/cuda-c++/vector_add_cupy.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution.cu b/examples/cuda/convolution.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution.py b/examples/cuda/convolution.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution_correct.py b/examples/cuda/convolution_correct.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution_streams.cu b/examples/cuda/convolution_streams.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution_streams.py b/examples/cuda/convolution_streams.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/expdist.cu b/examples/cuda/expdist.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/expdist.py b/examples/cuda/expdist.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/matmul.cu b/examples/cuda/matmul.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/matmul.py b/examples/cuda/matmul.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/pnpoly.cu b/examples/cuda/pnpoly.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/pnpoly.py b/examples/cuda/pnpoly.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/pnpoly_host.cu b/examples/cuda/pnpoly_host.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/python_kernel.py b/examples/cuda/python_kernel.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/reduction.cu b/examples/cuda/reduction.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/reduction.py b/examples/cuda/reduction.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/sepconv.py b/examples/cuda/sepconv.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/spmv.cu b/examples/cuda/spmv.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/spmv.py b/examples/cuda/spmv.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/stencil.cu b/examples/cuda/stencil.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/stencil.py b/examples/cuda/stencil.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/test_vector_add.py b/examples/cuda/test_vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/test_vector_add_parameterized.py b/examples/cuda/test_vector_add_parameterized.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/texture.py b/examples/cuda/texture.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add.py b/examples/cuda/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_codegen.py b/examples/cuda/vector_add_codegen.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_cupy.py b/examples/cuda/vector_add_cupy.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_jinja.cu b/examples/cuda/vector_add_jinja.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_jinja.py b/examples/cuda/vector_add_jinja.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_jinja2.py b/examples/cuda/vector_add_jinja2.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_metric.py b/examples/cuda/vector_add_metric.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_observers.py b/examples/cuda/vector_add_observers.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/zeromeanfilter.cu b/examples/cuda/zeromeanfilter.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/zeromeanfilter.py b/examples/cuda/zeromeanfilter.py
old mode 100755
new mode 100644
diff --git a/examples/fortran/test_vector_add.py b/examples/fortran/test_vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add.F90 b/examples/fortran/vector_add.F90
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add.py b/examples/fortran/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add_acc.F90 b/examples/fortran/vector_add_acc.F90
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add_acc.py b/examples/fortran/vector_add_acc.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/convolution.cl b/examples/opencl/convolution.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/convolution.py b/examples/opencl/convolution.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/convolution_correct.py b/examples/opencl/convolution_correct.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/matmul.cl b/examples/opencl/matmul.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/matmul.py b/examples/opencl/matmul.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/reduction.cl b/examples/opencl/reduction.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/reduction.py b/examples/opencl/reduction.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/sepconv.py b/examples/opencl/sepconv.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/stencil.cl b/examples/opencl/stencil.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/stencil.py b/examples/opencl/stencil.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/vector_add.py b/examples/opencl/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/vector_add_codegen.py b/examples/opencl/vector_add_codegen.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/vector_add_observers.py b/examples/opencl/vector_add_observers.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/c.py b/kernel_tuner/c.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/cuda.py b/kernel_tuner/cuda.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/cupy.py b/kernel_tuner/cupy.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/kernelbuilder.py b/kernel_tuner/kernelbuilder.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/nvml.py b/kernel_tuner/nvml.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/observers.py b/kernel_tuner/observers.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/opencl.py b/kernel_tuner/opencl.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/runners/__init__.py b/kernel_tuner/runners/__init__.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/__init__.py b/kernel_tuner/strategies/__init__.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py b/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py b/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py b/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/hillclimbers.py b/kernel_tuner/strategies/hillclimbers.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/mls.py b/kernel_tuner/strategies/mls.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/ordered_greedy_mls.py b/kernel_tuner/strategies/ordered_greedy_mls.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/wrappers.py b/kernel_tuner/wrappers.py
old mode 100755
new mode 100644
diff --git a/roadmap.md b/roadmap.md
old mode 100755
new mode 100644
diff --git a/setup.cfg b/setup.cfg
old mode 100755
new mode 100644
diff --git a/setup.py b/setup.py
old mode 100755
new mode 100644
diff --git a/test/__init__.py b/test/__init__.py
old mode 100755
new mode 100644
diff --git a/test/context.py b/test/context.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_minimize.py b/test/strategies/test_minimize.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
old mode 100755
new mode 100644
diff --git a/test/test_c_functions.py b/test/test_c_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
old mode 100755
new mode 100644
diff --git a/test/test_core.py b/test/test_core.py
old mode 100755
new mode 100644
diff --git a/test/test_cuda_functions.py b/test/test_cuda_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_cuda_mocked.py b/test/test_cuda_mocked.py
old mode 100755
new mode 100644
diff --git a/test/test_cupy_functions.py b/test/test_cupy_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_hyper.py b/test/test_hyper.py
old mode 100755
new mode 100644
diff --git a/test/test_integration.py b/test/test_integration.py
old mode 100755
new mode 100644
diff --git a/test/test_interface.py b/test/test_interface.py
old mode 100755
new mode 100644
diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py
old mode 100755
new mode 100644
diff --git a/test/test_minimize.py b/test/test_minimize.py
old mode 100755
new mode 100644
diff --git a/test/test_observers.py b/test/test_observers.py
old mode 100755
new mode 100644
diff --git a/test/test_opencl_functions.py b/test/test_opencl_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_runners.py b/test/test_runners.py
old mode 100755
new mode 100644
diff --git a/test/test_util_functions.py b/test/test_util_functions.py
old mode 100755
new mode 100644
diff --git a/tutorial/README.md b/tutorial/README.md
old mode 100755
new mode 100644
diff --git a/tutorial/convolution.ipynb b/tutorial/convolution.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/diffusion.ipynb b/tutorial/diffusion.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/diffusion_opencl.ipynb b/tutorial/diffusion_opencl.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/diffusion_use_optparam.ipynb b/tutorial/diffusion_use_optparam.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/grid3d.ipynb b/tutorial/grid3d.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul.cu b/tutorial/matmul/matmul.cu
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul.png b/tutorial/matmul/matmul.png
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul.py b/tutorial/matmul/matmul.py
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_naive.cu b/tutorial/matmul/matmul_naive.cu
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_naive.png b/tutorial/matmul/matmul_naive.png
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_naive.py b/tutorial/matmul/matmul_naive.py
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_shared.cu b/tutorial/matmul/matmul_shared.cu
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_shared.png b/tutorial/matmul/matmul_shared.png
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_shared.py b/tutorial/matmul/matmul_shared.py
old mode 100755
new mode 100644
diff --git a/tutorial/matrix_multiplication.ipynb b/tutorial/matrix_multiplication.ipynb
old mode 100755
new mode 100644

From 531627ac5649b7e2e9b91441c4a85a93b117c19a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 24 Mar 2022 21:02:41 +0100
Subject: [PATCH 006/253] Search spaces are now generated much more efficiently
 using python-constraint, also added general Python runner

---
 kernel_tuner/core.py                          |   7 +-
 kernel_tuner/interface.py                     |   9 +-
 kernel_tuner/python.py                        | 195 +++++++++++-------
 kernel_tuner/runners/sequential.py            |   5 +-
 kernel_tuner/runners/simulation.py            |   6 +-
 .../strategies/bayes_opt_GPyTorch_lean.py     | 113 ++++------
 kernel_tuner/strategies/brute_force.py        |  13 +-
 kernel_tuner/strategies/random_sample.py      |  10 +-
 kernel_tuner/util.py                          | 114 +++++++++-
 setup.py                                      |   2 +-
 10 files changed, 297 insertions(+), 177 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 1faf5deb0..765d0ee21 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -195,7 +195,7 @@ class DeviceInterface(object):
     """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner"""
 
     def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None,
-                 parallel_mode=False):
+                 parallel_mode=False, hyperparam_mode=False):
         """ Instantiate the DeviceInterface, based on language in kernel source
 
         :param kernel_source The kernel sources
@@ -212,7 +212,7 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
         :type device: int
 
         :param lang: Specifies the language used for GPU kernels.
-            Currently supported: "CUDA", "OpenCL", or "C"
+            Currently supported: "CUDA", "OpenCL", "C" or "Python"
         :type lang: string
 
         :param compiler_options: The compiler options to use when compiling kernels for this device.
@@ -241,7 +241,8 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
         elif lang == "C":
             dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations)
         elif lang == "Python":
-            dev = PythonFunctions(iterations=iterations, observers=observers, parallel_mode=parallel_mode, show_progressbar=True)
+            dev = PythonFunctions(iterations=iterations, observers=observers, parallel_mode=parallel_mode, hyperparam_mode=hyperparam_mode,
+                                  show_progressbar=True)
         else:
             raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
 
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 14f5dfd71..d28c41348 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -33,6 +33,7 @@
 import logging
 import sys
 import numpy
+from constraint import Constraint
 
 import kernel_tuner.util as util
 import kernel_tuner.core as core
@@ -403,7 +404,7 @@ def _get_docstring(opts):
 def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params, grid_div_x=None, grid_div_y=None, grid_div_z=None, restrictions=None,
                 answer=None, atol=1e-6, verify=None, verbose=False, lang=None, device=0, platform=0, smem_args=None, cmem_args=None, texmem_args=None,
                 compiler=None, compiler_options=None, log=None, iterations=7, block_size_names=None, quiet=False, strategy=None, strategy_options=None,
-                cache=None, metrics=None, simulation_mode=False, parallel_mode=False, observers=None):
+                cache=None, metrics=None, simulation_mode=False, parallel_mode=False, hyperparam_mode=False, observers=None):
 
     if log:
         logging.basicConfig(filename=kernel_name + datetime.now().strftime('%Y%m%d-%H:%M:%S') + '.log', level=log)
@@ -418,8 +419,8 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
     # check whether block_size_names are used as expected
     util.check_block_size_params_names_list(block_size_names, tune_params)
 
-    # if the restrictions are not callable, make them (increases restrictions check performance significantly)
-    if restrictions is not None and not callable(restrictions):
+    # if the restrictions are not constraints or a callable, the restrictions are strings, so parse them to functions (increases restrictions check performance significantly)
+    if restrictions is not None and not callable(restrictions) and not any(isinstance(r, Constraint) for r in restrictions):
         restrictions = util.parse_restrictions(restrictions)
 
     if iterations < 1:
@@ -469,7 +470,7 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
 
     # select the runner for this job based on input
     selected_runner = SimulationRunner if simulation_mode is True else SequentialRunner
-    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers, parallel_mode) as runner:
+    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers, parallel_mode, hyperparam_mode) as runner:
 
         #the user-specified function may or may not have an optional atol argument;
         #we normalize it so that it always accepts atol.
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
index 69c6ac33c..00f2b24c1 100644
--- a/kernel_tuner/python.py
+++ b/kernel_tuner/python.py
@@ -33,7 +33,7 @@
 class PythonFunctions(object):
     """Class that groups the code for running and compiling C functions"""
 
-    def __init__(self, iterations=7, observers=None, parallel_mode=False, show_progressbar=False):
+    def __init__(self, iterations=7, observers=None, parallel_mode=False, hyperparam_mode=False, show_progressbar=False):
         """instantiate PythonFunctions object used for interacting with Python code
 
         :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
@@ -49,7 +49,12 @@ def __init__(self, iterations=7, observers=None, parallel_mode=False, show_progr
         self.env = env
         self.name = platform.processor()
         self.observers = observers or []
-        self.parallel_mode = parallel_mode
+        self.num_unused_cores = 1    # do not use all cores to do other work
+        self.num_cores = max(min(cpu_count() - self.num_unused_cores, self.iterations), 1)    # assumes cpu_count does not change during the life of this class!
+        self.parallel_mode = parallel_mode and self.num_cores > 1
+        self.hyperparam_mode = hyperparam_mode
+
+        self.benchmark = self.benchmark_normal if not self.hyperparam_mode else self.benchmark_hyperparams
 
         self.benchmark_times = []
 
@@ -87,22 +92,67 @@ def compile(self, kernel_instance):
         delete_temp_file(source_file)
         return func
 
-    def benchmark(self, func, args, threads, grid):
-        """runs the kernel repeatedly, returns averaged returned value
+    def benchmark_normal(self, func, args, threads, grid):
+        """runs the kernel repeatedly, returns times
+
+        :param func: A Python function for this specific configuration
+        :type func: ctypes._FuncPtr
+
+        :param args: A list of arguments to the function, order should match the
+            order in the code. The list should be prepared using
+            ready_argument_list().
+        :type args: list(Argument)
+
+        :param threads: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type threads: any
+
+        :param grid: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type grid: any
+
+        :returns: All times.
+        :rtype: dict()
+        """
+
+        result = dict()
+        result["times"] = []
+        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
+            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
+
+        # new implementation
+        start_time = perf_counter()
+        if self.parallel_mode:
+            logging.debug(f"Running benchmark in parallel on {self.num_cores} processors")
+            manager = Manager()
+            invalid_flag = manager.Value('i', int(False))
+            values = manager.list()
+            runtimes = manager.list()
+            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
+                args = func, args, self.params, invalid_flag
+                values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                values, runtimes = list(values), list(runtimes)
+            result["strategy_time"] = np.mean(runtimes)
+        else:
+            values = list()
+            for _ in range(self.iterations):
+                value = self.run_kernel(func, args, threads, grid)
+                if value < 0.0:
+                    raise Exception("too many resources requested for launch")
+                values.append(value)
+
+        benchmark_time = perf_counter() - start_time
+        self.benchmark_times.append(benchmark_time)
 
-        The C function tuning is a little bit more flexible than direct CUDA
-        or OpenCL kernel tuning. The C function needs to measure time, or some
-        other quality metric you wish to tune on, on its own and should
-        therefore return a single floating-point value.
+        result["times"] = values
+        result["time"] = np.mean(values)
+        # print(f"Mean: {np.mean(values)}, std: {np.std(values)} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
+        return result
 
-        Benchmark runs the C function repeatedly and returns the average of the
-        values returned by the C function. The number of iterations is set
-        during the creation of the CFunctions object. For all measurements the
-        lowest and highest values are discarded and the rest is included in the
-        average. The reason for this is to be robust against initialization
-        artifacts and other exceptional cases.
+    def benchmark_hyperparams(self, func, args, threads, grid):
+        """runs the kernel repeatedly, returns grandmedian for hyperparameter tuning
 
-        :param func: A C function compiled for this specific configuration
+        :param func: A Python function for this specific configuration
         :type func: ctypes._FuncPtr
 
         :param args: A list of arguments to the function, order should match the
@@ -118,7 +168,7 @@ def benchmark(self, func, args, threads, grid):
             interface as CudaFunctions and OpenCLFunctions.
         :type grid: any
 
-        :returns: All execution times.
+        :returns: All execution hyperparameter scores in the same format as times.
         :rtype: dict()
         """
 
@@ -137,18 +187,17 @@ def benchmark(self, func, args, threads, grid):
 
         # new implementation
         start_time = perf_counter()
-        if self.parallel_mode and cpu_count() > 1:
-            num_procs = max(min(cpu_count() - 2, self.iterations), 1)
-            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+        if self.parallel_mode:
+            logging.debug(f"Running hyperparameter benchmark in parallel on {self.num_cores} processors")
             manager = Manager()
             invalid_flag = manager.Value('i', int(False))
-            MNE_values = manager.list()
+            MWP_values = manager.list()
             runtimes = manager.list()
             warnings_dicts = manager.list()
-            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
                 args = func, args, self.params, invalid_flag
-                MNE_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                MNE_values, runtimes, warnings_dicts = list(MNE_values), list(runtimes), list(warnings_dicts)
+                MWP_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                MWP_values, runtimes, warnings_dicts = list(MWP_values), list(runtimes), list(warnings_dicts)
             result["strategy_time"] = np.mean(runtimes)
             warning_dict = warnings_dicts[0]
             for key in warning_dict.keys():
@@ -159,12 +208,13 @@ def benchmark(self, func, args, threads, grid):
 
         benchmark_time = perf_counter() - start_time
         self.benchmark_times.append(benchmark_time)
-        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
 
-        grandmean, times = get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations)
+        grandmean, times = get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations)
         result["times"] = times
         result["time"] = grandmean
-        print(f"Grandmean over kernels: {grandmean}, mean MNE per iteration: {np.mean(times)}, std MNE per iteration: {np.std(times)}")
+        print(f"Grandmean: {grandmean} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
+        # print(f"Grandmean: {grandmean}, mean MWP per iteration: {np.mean(times)}, std MWP per iteration: {np.std(times)}")
+        # print(f"In {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
         return result
 
         start_time = perf_counter()
@@ -221,10 +271,10 @@ def benchmark(self, func, args, threads, grid):
         result["time"] = mean_mean_MRE
         return result
 
-    def run_kernel(self, func, args):
+    def run_kernel(self, func, args, threads, grid):
         """runs the kernel once, returns whatever the kernel returns
 
-        :param func: A C function compiled for this specific configuration
+        :param func: A Python function for this specific configuration
         :type func: ctypes._FuncPtr
 
         :param args: A list of arguments to the function, order should match the
@@ -253,12 +303,12 @@ def run_kernel(self, func, args):
     units = {}
 
 
-def run_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
-    """ Function to run a kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
+def run_hyperparam_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
+    """ Function to run a hyperparam kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
     PID = getpid()
-    print(f"Iter {iter+1}, PID {PID}", flush=True)
+    # print(f"Iter {iter+1}, PID {PID}", flush=True)
     func, funcargs, params, invalid_flag = args
-    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
+    logging.debug(f"run_kernel iter {iter} (PID {PID})")
     logging.debug("arguments=" + str([str(arg) for arg in funcargs]))
 
     # run the kernel
@@ -270,8 +320,8 @@ def run_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
     return values, runtime, warning_dict
 
 
-def run_kernel_as_subprocess(iter, args):
-    """ Function to run a kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
+def run_hyperparam_kernel_as_subprocess(iter, args):
+    """ Function to run a hyperparam kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
     func, args, params = args
     PID = getpid()
     # print(f"Iter {iter}, PID {PID}", flush=True)
@@ -298,47 +348,52 @@ def make_kwargstrings(**kwargs) -> list:
     return time
 
 
-def get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations=1):
-    """ Get the grandmean (mean of median MNE per kernel) and mean MNE per iteration """
-    MNE_values = np.array(MNE_values)
-    median_MNEs = np.array([])
-    valid_MNE_times = list()
-    # get the mean MNE per kernel
-    for i in range(len(MNE_values[0])):
-        MNE_kernel_values = MNE_values[:, i]
-        valid_MNE_mask = (MNE_kernel_values < invalid_value) & (MNE_kernel_values >= 0)
-        valid_MNE_kernel_values = MNE_kernel_values[valid_MNE_mask]
-        if len(valid_MNE_kernel_values) >= min_valid_iterations:
+def get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations=1):
+    """ Get the grandmean (mean of median MWP per kernel) and mean MWP per iteration """
+    MWP_values = np.array(MWP_values)
+    median_MWPs = np.array([])
+    median_MWPs_vars = np.array([])
+    valid_MWP_times = list()
+    # get the mean MWP per kernel
+    for i in range(len(MWP_values[0])):
+        MWP_kernel_values = MWP_values[:, i]
+        valid_MWP_mask = (MWP_kernel_values < invalid_value) & (MWP_kernel_values >= 0)
+        valid_MWP_kernel_values = MWP_kernel_values[valid_MWP_mask]
+        if len(valid_MWP_kernel_values) >= min_valid_iterations:
             # # filter outliers by keeping only values that are within two times the Median Absolute Deviation
-            # AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            # AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
             # MAD = np.median(AD)
-            # selected_MNE_kernel_values = valid_MNE_kernel_values[AD < MAD * 3]
-            # print(f"Removed {len(valid_MNE_kernel_values) - len(selected_MNE_kernel_values)}")
-            # median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
-            # median_MNEs = np.append(median_MNEs, np.mean(valid_MNE_kernel_values))
+            # selected_MWP_kernel_values = valid_MWP_kernel_values[AD < MAD * 3]
+            # print(f"Removed {len(valid_MWP_kernel_values) - len(selected_MWP_kernel_values)}")
+            # median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
+            # median_MWPs = np.append(median_MWPs, np.mean(valid_MWP_kernel_values))
 
             # filter outliers by keeping only values that are within three times the Median Absolute Deviation
-            AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
             MAD = np.median(AD)
             MAD_score = AD / MAD if MAD else 0.0
-            selected_MNE_kernel_values = valid_MNE_kernel_values[MAD_score < 3]
-            median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
+            selected_MWP_kernel_values = valid_MWP_kernel_values[MAD_score < 3]
+            median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
+            median_MWPs_vars = np.append(median_MWPs_vars, np.std(selected_MWP_kernel_values))
         else:
-            median_MNEs = np.append(median_MNEs, invalid_value)
-
-    # get the mean MNE per iteration
-    for i in range(len(MNE_values)):
-        MNE_iteration_values = MNE_values[i]
-        valid_MNE_mask = (MNE_iteration_values < invalid_value) & (MNE_iteration_values >= 0)
-        valid_MNE_iteration_values = MNE_iteration_values[valid_MNE_mask]
-        if len(valid_MNE_iteration_values) > 0:
-            valid_MNE_times.append(np.mean(valid_MNE_iteration_values))
+            median_MWPs = np.append(median_MWPs, invalid_value)
+            median_MWPs_vars = np.append(median_MWPs_vars, 1)
+
+    # get the mean MWP per iteration
+    for i in range(len(MWP_values)):
+        MWP_iteration_values = MWP_values[i]
+        valid_MWP_mask = (MWP_iteration_values < invalid_value) & (MWP_iteration_values >= 0)
+        valid_MWP_iteration_values = MWP_iteration_values[valid_MWP_mask]
+        if len(valid_MWP_iteration_values) > 0:
+            valid_MWP_times.append(np.mean(valid_MWP_iteration_values))
         else:
-            valid_MNE_times.append(invalid_value)
-
-    # get the grandmean by taking the mean over the median MNE per iteration, invalid if one of the kernels is invalid
-    print(median_MNEs)
-    grandmean_MNE = np.mean(median_MNEs)
-    if np.isnan(grandmean_MNE) or len(median_MNEs[median_MNEs >= invalid_value]) > 0:
-        grandmean_MNE = invalid_value
-    return grandmean_MNE, valid_MNE_times
+            valid_MWP_times.append(invalid_value)
+
+    # get the grandmean by taking the inverse-variance weighted average over the median MWP per kernel, invalid if one of the kernels is invalid
+    print(median_MWPs)
+    print(median_MWPs / median_MWPs_vars, np.sum(1 / median_MWPs_vars), np.std(median_MWPs / median_MWPs_vars))
+    inverse_variance_weighted_average = np.sum(median_MWPs / median_MWPs_vars) / np.sum(1 / median_MWPs_vars)
+    grandmean_MWP = inverse_variance_weighted_average
+    if np.isnan(grandmean_MWP) or len(median_MWPs[median_MWPs >= invalid_value]) > 0:
+        grandmean_MWP = invalid_value
+    return grandmean_MWP, valid_MWP_times
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index 20fbfaa7b..65fd13ae7 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -11,7 +11,7 @@
 class SequentialRunner(object):
     """ SequentialRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False, hyperparam_mode=False):
         """ Instantiate the SequentialRunner
 
         :param kernel_source: The kernel source
@@ -30,7 +30,8 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         """
 
         #detect language and create high-level device interface
-        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, parallel_mode=parallel_mode, **device_options).__enter__()
+        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, parallel_mode=parallel_mode, hyperparam_mode=hyperparam_mode,
+                                   **device_options).__enter__()
 
         self.units = self.dev.units
         self.quiet = device_options.quiet
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 9e58634a5..e0317ed4f 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -88,8 +88,8 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
 
         logging.debug('DeviceInterface instantiated, lang=%s', lang)
 
-        if lang not in ('CUDA', 'OpenCL', 'C'):
-            raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
+        if lang not in ('CUDA', 'OpenCL', 'C', 'Python'):
+            raise ValueError("Sorry, support for languages other than CUDA, OpenCL, C or Python is not implemented yet")
         self.lang = lang
         self.dev = SimulationLangFunction(self.lang, device, iterations, compiler_options)
         self.max_threads = 1024
@@ -173,7 +173,7 @@ def __exit__(self, *exc):
 class SimulationRunner(object):
     """ SimulationRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False, hyperparam_mode=False):
         """ Instantiate the SimulationRunner
 
         :param kernel_source: The kernel source
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index e4809be7f..f91463a72 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -82,7 +82,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
-    parameter_space = get_valid_configs(tuning_options, max_threads)
+    parameter_space = util.get_valid_configs(tuning_options, max_threads)
 
     # limit max_fevals to max size of the parameter space
     max_fevals = min(len(parameter_space), max_fevals)
@@ -139,11 +139,11 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
 
         # get tuning options
         self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
-        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)
-        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)
-        self.training_iter = self.get_hyperparam("trainingiter", 1, type=int)
+        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)    # 0.1
+        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)    # 5
+        self.training_after_iter = self.get_hyperparam("trainingafteriter", 1, type=int)    # 1
         self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
-        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 0.5, type=float)
+        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5, type=float)
         self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
         self.optimizer_name = self.get_hyperparam("optimizer", "LBFGS", supported_optimizers)
         self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", self.optimizer_name, type=float, cast=default_optimizer_learningrates)
@@ -153,7 +153,7 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         # set acquisition function options
         self.set_acquisition_function(acquisition_function_name)
         if 'explorationfactor' not in af_params:
-            af_params['explorationfactor'] = 0.1
+            af_params['explorationfactor'] = 0.1    # 0.1
         self.af_params = af_params
 
         # set Tensors
@@ -208,6 +208,10 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
             'lengthscale': np.array([]),
             'noise': np.array([]),
         }
+
+        # initialize the model
+        if not self.runner.simulation_mode:
+            self.import_cached_evaluations()
         self.initialize_model()
 
     @property
@@ -230,7 +234,7 @@ def train_y_err(self):
         """ Get the error on the valid results """
         std = self.results_std[self.valid_configs]
         if self.scaled_output and std.std() > 0.0:
-            std = (std - std.mean()) / std.std()
+            std = (std - std.mean()) / std.std()    # use z-score to get normalized variability
         return std
 
     @property
@@ -243,6 +247,12 @@ def test_x_unscaled(self):
         """ Get the unscaled, not yet visited parameter configurations """
         return self.param_configs[self.unvisited_configs]
 
+    @property
+    def test_y_err(self):
+        """ Get the expected error on the test set """
+        train_y_err = self.train_y_err
+        return torch.full((self.size - len(train_y_err), ), torch.mean(train_y_err))
+
     @property
     def invalid_x(self):
         """ Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid) """
@@ -262,17 +272,17 @@ def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tenso
 
     def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
         """ Initialize the surrogate model """
-        if not self.runner.simulation_mode:
-            self.import_cached_evaluations()
-        self.initial_sample_std = self.min_std
+        # self.initial_sample_std = self.min_std
         if take_initial_sample:
             self.initial_sample()
 
         # create the model
         if self.likelihood_name == 'Gaussian':
             self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        elif self.likelihood_name == 'GaussianPrior':
+            raise NotImplementedError("Gaussian Prior likelihood has not been implemented yet")
         elif self.likelihood_name == 'FixedNoise':
-            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=False)
+            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=True)
         self.likelihood = self.likelihood.to(self.device)
         self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
 
@@ -360,8 +370,8 @@ def initial_sample(self):
         # set the current optimum, initial sample mean and initial sample std
         self.current_optimum = self.opt(self.train_y).item()
         self.initial_sample_mean = self.train_y.mean().item()
-        # self.initial_sample_std = self.train_y.std().item()
-        self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
+        self.initial_sample_std = self.train_y.std().item()
+        # self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
 
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
@@ -471,6 +481,8 @@ def closure():
             except gpytorch.utils.errors.NotPSDError:
                 warnings.warn("Matrix not positive definite during training", NotPSDTrainingWarning)
                 return np.nan
+            except RuntimeError as e:
+                warnings.warn(str(e), RuntimeWarning)
 
         loss = None
         for _ in range(training_iter):
@@ -482,6 +494,9 @@ def closure():
             except gpytorch.utils.errors.NanError:
                 warnings.warn("PSD_safe_Cholesky failed due to too many NaN", NaNTrainingWarning)
                 break
+            except TypeError as e:
+                warnings.warn(str(e), RuntimeWarning)
+                break
 
         # set the hyperparams to the new values
         try:
@@ -518,8 +533,8 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
             else:
                 predictions_tuple = self.predict_list()
-                if self.initial_sample_std <= self.min_std:
-                    self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
+                # if self.initial_sample_std <= self.min_std:
+                # self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
             # if there are NaN or all of the predicted std are the same, take from the least evaluated region
             mean_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[0])).item())
             std_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[1])).item())
@@ -536,7 +551,7 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 warnings.warn(
                     f"After {self.fevals}/{max_fevals} fevals, {warning_reason}, picking one from the least evaluated region and resetting the surrogate model",
                     ResetModelWarning)
-                self.initialize_model(take_initial_sample=False, train_hyperparams=False)
+                self.initialize_model(take_initial_sample=False, train_hyperparams=True)
             else:
                 # otherwise, optimize the acquisition function to find the next candidate
                 hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
@@ -569,8 +584,8 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 last_invalid = False
                 self.model.set_train_data(self.train_x, self.train_y, strict=False)
                 # do not train if there are multiple minima, because it introduces numerical instability or insolvability
-                if self.training_iter > 0:
-                    self.train_hyperparams(training_iter=self.training_iter)
+                if self.training_after_iter > 0 and (self.fevals % self.training_after_iter == 0):
+                    self.train_hyperparams(training_iter=1)    # TODO experiment with other training iter
                 # set the current optimum
                 self.current_optimum = self.opt(self.train_y).item()
             # print(f"Valid: {len(self.train_x)}, unvisited: {len(self.test_x)}, invalid: {len(self.invalid_x)}, last invalid: {last_invalid}")
@@ -603,7 +618,7 @@ def register_result(self, result: float, param_config_index: int):
         if result != self.invalid_value:
             self.valid_configs[param_config_index] = True
             self.results[param_config_index] = result
-            assert last_result['time'] == result
+            # assert last_result['time'] == result TODO remove
             self.results_std[param_config_index] = max(np.std(last_result['times']), self.min_std)
 
         # add the current model parameters to the last entry of the results dict
@@ -634,6 +649,9 @@ def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
             except gpytorch.utils.errors.NotPSDError:
                 warnings.warn("NotPSD error during predictions", NotPSDPredictionWarning)
                 return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
+            except RuntimeError as e:
+                warnings.warn(str(e), RuntimeWarning)
+                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
 
     def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
         """ compute probability of improvement by assuming normality on the difference in improvement """
@@ -656,14 +674,12 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
             improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
             # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
-            x = 1 - min(max(1 - improvement_diff, 0.2), 0.0)
-            # x = 1 - min(max(improvement_diff, 1) * 0.2, 0.0)
+            # x = 1 - max(max(1 - improvement_diff, 0.2), 0.0)
+            x = 1 - max(min(improvement_diff, 1) * 0.2, 0.0)
             # the smaller the difference between the initial sample error and current sample error, the greater the exploration
             # x = 1 - min(max(self.initial_sample_std - std.mean().item(), 1.0), 0.8)
             # print(self.initial_sample_std, std.mean().item())
-            # print(x)
             cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2, y=0.003; at x=0.2057, y=0.0.
-            # print(cv)
             return cv
         else:
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
@@ -821,57 +837,10 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
 
         return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
 
-    def to_xarray(self):
-        # print(self.tuning_options['tune_params'])
-        # print(az.convert_to_inference_data(self.tuning_options['tune_params']).posterior)
-        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
-            posterior = self.model(self.param_configs_scaled)
-            predictive_posterior = self.likelihood(posterior)
-            # print(posterior.variance)
-            # print(az.convert_to_inference_data(posterior.to_data_independent_dist()))
-            # print(len(posterior.covariance_matrix))
-            # print(len(posterior.covariance_matrix[0]))
-            # exit(0)
-
-            # data = az.load_arviz_data('centered_eight')
-            # az.plot_posterior(data, show=True)
-
-            param_configs = list(tuple(pc) for pc in self.param_configs.tolist())
-            # posterior_dict = dict(zip(param_configs, posterior.get_base_samples()))
-            posterior_dict = {
-                'mu': posterior.mean,
-                'var': posterior.variance
-            }
-            predictive_posterior_dict = {
-                'mu': predictive_posterior.mean,
-                'var': predictive_posterior.variance
-            }
-            print(posterior_dict)
-            # predictive_posterior_dict = dict(zip(str(self.param_configs_scaled.numpy()), predictive_posterior.get_base_samples()))
-            # log_prob_dict = dict(zip(self.param_configs_scaled, predictive_posterior.log_prob()))
-            tune_param_keys = np.array(list(self.tune_params.keys()))[self.nonstatic_params]
-            tune_param_values = np.array(list(self.tune_params.values()), dtype=object)[self.nonstatic_params]
-            coordinates = dict(zip(tune_param_keys, tune_param_values))
-            dimensions = dict(zip(tune_param_keys, ([k] for k in tune_param_keys)))
-            print(coordinates)
-            print(dimensions)
-            data = az.from_dict(posterior_dict, posterior_predictive=predictive_posterior_dict)
-            print(az.summary(data))
-            print(data.posterior)
-            print(data.posterior_predictive)
-            az.plot_trace(data, show=True)
-            exit(0)
-            print(data.posterior_predictive)
-
-            # print(az.convert_to_inference_data(posterior.get_base_samples()))
-        # TODO create InferenceData
-        # print(predictive_posterior.sample())
-        # print(az.from_dict())
-        # print(az.convert_to_inference_data(predictive_posterior))
-        exit(0)
-
     def visualize(self):
         """ Visualize the surrogate model and observations in a plot """
+        if self.fevals < 220:
+            return None
         from matplotlib import pyplot as plt
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             # Initialize plot
diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py
index d72713908..b8c148923 100644
--- a/kernel_tuner/strategies/brute_force.py
+++ b/kernel_tuner/strategies/brute_force.py
@@ -34,12 +34,15 @@ def tune(runner, kernel_options, device_options, tuning_options):
     restrictions = tuning_options.restrictions
     verbose = tuning_options.verbose
 
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
+    # # compute cartesian product of all tunable parameters
+    # parameter_space = itertools.product(*tune_params.values())
 
-    # check for search space restrictions
-    if restrictions is not None:
-        parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose), parameter_space)
+    # # check for search space restrictions
+    # if restrictions is not None:
+    #     parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose), parameter_space)
+
+    parameter_space = util.get_valid_configs(tuning_options, runner.dev.max_threads)
+    print(f"Parameter space size: {len(parameter_space)}")
 
     results, env = runner.run(parameter_space, kernel_options, tuning_options)
 
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
index 3b8f20a51..8eef8ded6 100644
--- a/kernel_tuner/strategies/random_sample.py
+++ b/kernel_tuner/strategies/random_sample.py
@@ -5,6 +5,7 @@
 import numpy
 
 from kernel_tuner import util
+from time import perf_counter
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
@@ -35,15 +36,10 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     fraction = tuning_options.strategy_options.get("fraction", 0.1)
 
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if tuning_options.restrictions is not None:
-        parameter_space = filter(lambda p: util.check_restrictions(tuning_options.restrictions, p, tune_params.keys(), tuning_options.verbose), parameter_space)
+    parameter_space = util.get_valid_configs(tuning_options, runner.dev.max_threads)
 
     # reduce parameter space to a random sample using sample_fraction
-    parameter_space = numpy.array(list(parameter_space))
+    parameter_space = numpy.array(parameter_space)
     size = len(parameter_space)
     fraction = int(numpy.ceil(size * fraction))
     sample_indices = numpy.random.choice(range(size), size=fraction, replace=False)
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 332ac6750..0f18c0f0f 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -4,6 +4,7 @@
 from collections import OrderedDict
 import os
 import errno
+from tabnanny import verbose
 import tempfile
 import logging
 import warnings
@@ -11,6 +12,7 @@
 from types import FunctionType
 
 import numpy as np
+from constraint import Problem, Constraint, FunctionConstraint
 try:
     import cupy as cp
 except ImportError:
@@ -122,9 +124,8 @@ def check_block_size_params_names_list(block_size_names, tune_params):
             warnings.warn("None of the tunable parameters specify thread block dimensions!", UserWarning)
 
 
-def check_restrictions(restrictions, element, keys, verbose):
+def check_restrictions(restrictions, params, verbose):
     """ check whether a specific instance meets the search space restrictions """
-    params = OrderedDict(zip(keys, element))
     valid = True
     if callable(restrictions):
         valid = restrictions(params)
@@ -140,14 +141,23 @@ def check_restrictions(restrictions, element, keys, verbose):
     return valid
 
 
+def check_thread_block_dimensions(params, max_threads, block_size_names=None):
+    """ check on maximum thread block dimensions """
+    dims = get_thread_block_dimensions(params, block_size_names)
+    return np.prod(dims) <= max_threads
+
+
 def config_valid(config, tuning_options, max_threads):
     """ combines restrictions and a check on the max thread block dimension to check config validity """
     legal = True
-    if tuning_options.restrictions:
-        legal = check_restrictions(tuning_options.restrictions, config, tuning_options.tune_params.keys(), False)
     params = OrderedDict(zip(tuning_options.tune_params.keys(), config))
-    dims = get_thread_block_dimensions(params, tuning_options.get("block_size_names", None))
-    return legal and np.prod(dims) <= max_threads
+    if tuning_options.restrictions:
+        legal = check_restrictions(tuning_options.restrictions, params, False)
+        if not legal:
+            return False
+    block_size_names = tuning_options.get("block_size_names", None)
+    valid_thread_block_dimensions = check_thread_block_dimensions(params, max_threads, block_size_names)
+    return legal and valid_thread_block_dimensions
 
 
 def delete_temp_file(filename):
@@ -262,10 +272,42 @@ def get_kernel_string(kernel_source, params=None):
 
 def get_valid_configs(tuning_options, max_threads) -> list:
     """ compute valid configurations in a search space based on restrictions and max_threads"""
-    parameter_space = itertools.product(*tuning_options.tune_params.values())
-    if tuning_options.restrictions is not None:
-        parameter_space = filter(lambda p: config_valid(p, tuning_options, max_threads), parameter_space)
-    return list(parameter_space)
+    restrictions = tuning_options.restrictions
+    tune_params = tuning_options.tune_params
+    param_names = list(tune_params.keys())
+
+    # instantiate the parameter space with all the variables
+    parameter_space = Problem()
+    for param_name, param_values in tune_params.items():
+        parameter_space.addVariable(param_name, param_values)
+
+    # add the user-specified restrictions as constraints on the parameter space
+    if isinstance(restrictions, list):
+        for restriction in restrictions:
+            if isinstance(restriction, FunctionConstraint):
+                parameter_space.addConstraint(restriction, param_names)
+            elif isinstance(restriction, Constraint):
+                parameter_space.addConstraint(restriction)
+            else:
+                raise ValueError(f"Unrecognized restriction {restriction}")
+    # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!)
+    elif callable(restrictions):
+        restrictions_wrapper = lambda *args: check_restrictions(restrictions, dict(zip(param_names, args)), False)
+        parameter_space.addConstraint(restrictions_wrapper, param_names)
+
+    # add the default blocksize threads restrictions last, because it is unlikely to reduce the parameter space by much
+    block_size_names = tuning_options.get("block_size_names", default_block_size_names)
+    block_size_names = list(block_size_name for block_size_name in block_size_names if block_size_name in param_names)
+    if len(block_size_names) > 0:
+        parameter_space.addConstraint(MaxProdConstraint(max_threads), block_size_names)
+
+    # construct the parameter space with the constraints applied
+    parameter_space = parameter_space.getSolutions()
+    # form the parameter tuples in the order specified by tune_params.keys()
+    parameter_space_list = list()
+    for params in parameter_space:
+        parameter_space_list.append(tuple(params[param_name] for param_name in param_names))
+    return parameter_space_list
 
 
 def get_number_of_valid_configs(tuning_options, max_threads) -> int:
@@ -691,3 +733,55 @@ def parse_restrictions(restrictions: list):
     code_object = compile(parsed_restrictions, '<string>', 'exec')
     func = FunctionType(code_object.co_consts[0], globals())
     return func
+
+
+class MaxProdConstraint(Constraint):
+    """
+    Constraint enforcing that values of given variables prod up to
+    a given amount
+    Example:
+    >>> problem = Problem()
+    >>> problem.addVariables(["a", "b"], [1, 2])
+    >>> problem.addConstraint(MaxProdConstraint(3))
+    >>> sorted(sorted(x.items()) for x in problem.getSolutions())
+    [[('a', 1), ('b', 1)], [('a', 1), ('b', 2)], [('a', 2), ('b', 1)]]
+    """
+
+    def __init__(self, maxprod):
+        """
+        @param maxprod: Value to be considered as the maximum prod
+        @type  maxprod: number
+        @param multipliers: If given, variable values will be multiplied by
+                            the given factors before being prodmed to be checked
+        """
+        self._maxprod = maxprod
+
+    def preProcess(self, variables, domains, constraints, vconstraints):
+        Constraint.preProcess(self, variables, domains, constraints, vconstraints)
+        maxprod = self._maxprod
+        for variable in variables:
+            domain = domains[variable]
+            for value in domain[:]:
+                if value > maxprod:
+                    domain.remove(value)
+
+    def __call__(self, variables, domains, assignments, forwardcheck=False):
+        maxprod = self._maxprod
+        prod = 1
+        for variable in variables:
+            if variable in assignments:
+                prod *= assignments[variable]
+        if type(prod) is float:
+            prod = round(prod, 10)
+        if prod > maxprod:
+            return False
+        if forwardcheck:
+            for variable in variables:
+                if variable not in assignments:
+                    domain = domains[variable]
+                    for value in domain[:]:
+                        if prod + value > maxprod:
+                            domain.hideValue(value)
+                    if not domain:
+                        return False
+        return True
diff --git a/setup.py b/setup.py
index 113c18d33..ca7ce56c2 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@ def readme():
         'Topic :: System :: Distributed Computing',
         'Development Status :: 5 - Production/Stable',
     ],
-    install_requires=['numpy>=1.13.3', 'scipy>=0.18.1', 'jsonschema'],
+    install_requires=['numpy>=1.13.3', 'scipy>=0.18.1', 'jsonschema', 'python-constraint>=1.4.0'],
     extras_require={
         'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx', 'pytest', 'ipython'],
         'cuda': ['pycuda', pynvml],

From 5983e249aa17b6b41606be638d0aafd63604c163 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Mar 2022 15:05:06 +0100
Subject: [PATCH 007/253] Added backwards compatibility with most
 python-constraint Constraints for on-the-fly restriction-checking algorithms

---
 kernel_tuner/strategies/basinhopping.py |  2 +-
 kernel_tuner/strategies/minimize.py     |  2 +-
 kernel_tuner/util.py                    | 38 +++++++++++++++++++++++--
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
index 4a434434a..46c68ea61 100644
--- a/kernel_tuner/strategies/basinhopping.py
+++ b/kernel_tuner/strategies/basinhopping.py
@@ -37,7 +37,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
     method = tuning_options.strategy_options.get("method", "L-BFGS-B")
     T = tuning_options.strategy_options.get("T", 1.0)
 
-    #s cale variables in x to make 'eps' relevant for multiple variables
+    # scale variables in x to make 'eps' relevant for multiple variables
     tuning_options["scaling"] = True
 
     bounds, x0, eps = get_bounds_x0_eps(tuning_options)
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
index eb9b1b81b..e475890a8 100644
--- a/kernel_tuner/strategies/minimize.py
+++ b/kernel_tuner/strategies/minimize.py
@@ -81,7 +81,7 @@ def _cost_func(x, kernel_options, tuning_options, runner, results, check_restric
 
     # check if this is a legal (non-restricted) parameter instance
     if check_restrictions and tuning_options.restrictions:
-        legal = util.check_restrictions(tuning_options.restrictions, params, tuning_options.tune_params.keys(), tuning_options.verbose)
+        legal = util.config_valid(params, tuning_options, runner.dev.max_threads)
         if not legal:
             error_result = OrderedDict(zip(tuning_options.tune_params.keys(), params))
             error_result["time"] = error_time
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index a5a0f0eca..1c690c1fa 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1,4 +1,5 @@
 """ Module for kernel tuner utility functions """
+from argparse import ArgumentError
 import itertools
 import json
 from collections import OrderedDict
@@ -12,7 +13,7 @@
 from types import FunctionType
 
 import numpy as np
-from constraint import Problem, Constraint, FunctionConstraint
+from constraint import Problem, Constraint, AllDifferentConstraint, AllEqualConstraint, MaxSumConstraint, ExactSumConstraint, MinSumConstraint, InSetConstraint, NotInSetConstraint, SomeInSetConstraint, SomeNotInSetConstraint, FunctionConstraint
 try:
     import cupy as cp
 except ImportError:
@@ -135,8 +136,16 @@ def check_restrictions(restrictions, params, verbose):
     else:
         for restrict in restrictions:
             try:
-                if not eval(replace_param_occurrences(restrict, params)):
+                # if it's a python-constraint, convert to function and execute
+                if isinstance(restrict, Constraint):
+                    restrict = convert_constraint_restriction(restrict)
+                    if not restrict(params.values()):
+                        valid = False
+                        break
+                # if it's a string, fill in the parameters and evaluate
+                elif not eval(replace_param_occurrences(restrict, params)):
                     valid = False
+                    break
             except ZeroDivisionError:
                 pass
     if not valid and verbose:
@@ -144,6 +153,31 @@ def check_restrictions(restrictions, params, verbose):
     return valid
 
 
+def convert_constraint_restriction(restrict: Constraint):
+    """ Convert the python-constraint to a function for backwards compatibility """
+    if isinstance(restrict, FunctionConstraint):
+        f_restrict = lambda p: restrict._func(*p)
+    elif isinstance(restrict, AllDifferentConstraint):
+        f_restrict = lambda p: len(set(p)) == len(p)
+    elif isinstance(restrict, AllEqualConstraint):
+        f_restrict = lambda p: all(x == p[0] for x in p)
+    elif isinstance(restrict, MaxProdConstraint):
+        f_restrict = lambda p: np.prod(p) <= restrict._exactsum
+    elif isinstance(restrict, MaxSumConstraint):
+        f_restrict = lambda p: sum(p) <= restrict._exactsum
+    elif isinstance(restrict, ExactSumConstraint):
+        f_restrict = lambda p: sum(p) == restrict._exactsum
+    elif isinstance(restrict, MinSumConstraint):
+        f_restrict = lambda p: sum(p) >= restrict._exactsum
+    elif isinstance(restrict, (InSetConstraint, NotInSetConstraint, SomeInSetConstraint, SomeNotInSetConstraint)):
+        raise NotImplementedError(
+            f"Restriction of the type {type(restrict)} is explicitely not supported in backwards compatibility mode, because the behaviour is too complex. Please rewrite this constraint to a function to use it with this algorithm."
+        )
+    else:
+        raise TypeError(f"Unrecognized restriction {restrict}")
+    return f_restrict
+
+
 def check_thread_block_dimensions(params, max_threads, block_size_names=None):
     """ check on maximum thread block dimensions """
     dims = get_thread_block_dimensions(params, block_size_names)

From f4c8e0b3117025838d801e48524a96dce0b53b09 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 5 Apr 2022 18:24:26 +0200
Subject: [PATCH 008/253] Added new minmax initial sampling

---
 .../strategies/bayes_opt_GPyTorch_lean.py     | 84 ++++++++++++++++++-
 kernel_tuner/util.py                          |  6 +-
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index 2cdc49e08..59e385421 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -20,7 +20,7 @@
 
 # set supported hyperparameter values
 supported_precisions = ['float', 'double']
-supported_initial_sample_methods = ['lhs', 'index', 'random']
+supported_initial_sample_methods = ['lhs', 'index', 'minmax','random']
 supported_methods = ['ei', 'poi', 'random']
 supported_cov_kernels = ['matern', 'matern_scalekernel']
 supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
@@ -354,6 +354,8 @@ def initial_sample(self):
                     continue
                 list_param_config_indices.append(param_config_index)
                 self.evaluate_config(param_config_index)
+        elif self.initial_sample_method == 'minmax':
+            list_param_config_indices += self.take_min_max_initial_samples(list_param_config_indices)
 
         # then take index-spaced samples until all samples are valid
         while self.fevals < self.num_initial_samples:
@@ -441,6 +443,86 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
                     "Perhaps try something other than LHS."))
         return param_configs_indices
 
+    def take_min_max_initial_samples(self, list_param_config_indices: list, samples_per_parameter=1) -> list:
+        """ Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters """
+        # number of samples required is at least (samples_per_parameter) * (number of parameters) + 1
+
+        # first get the individual parameter values and sort them
+        params_values = list(self.tune_params.values())
+        for param_values in params_values:
+            param_values.sort()
+
+        number_of_params = len(params_values)
+        if self.num_initial_samples - self.fevals < samples_per_parameter * number_of_params + 1:
+            raise ValueError(f"There are not enough initial samples available ({self.num_initial_samples - self.fevals}) to do minmax initial sampling. At least {samples_per_parameter * number_of_params + 1} samples are required.")
+
+        # then take the minimum parameter configuration using BFS, this is used as the base
+        # instead of BFS, you could also search for the minimal sum of indices
+        minimum_index = None
+        param_level = 0
+        param_moving_index = -1
+        while minimum_index is None and self.num_initial_samples - self.fevals:
+            # create the minimum base configuration and find it in the search space
+            selected_param_config = torch.tensor(tuple(param_values[param_level+1] if param_index == param_moving_index else param_values[min(param_level, len(param_values)-1)] for param_index, param_values in enumerate(params_values)), dtype=self.dtype).to(self.device)
+            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
+            match_mask = (matching_params == number_of_params)
+            found_num_matching_param_configs = match_mask.count_nonzero()
+            temp_index = self.index_counter[match_mask]
+            # check if the configuration exists and is succesfully evaluated
+            if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
+                minimum_index = temp_index.item()
+                minimum_config = self.param_configs[minimum_index]
+                if minimum_index not in list_param_config_indices:
+                    list_param_config_indices.append(minimum_index)
+            # if it doesn't exist and evaluate, do a breadth-first search for the minimum configuration
+            else:
+                proceed = False
+                while not proceed:
+                    # first look at the current level
+                    if param_moving_index < len(params_values) - 1:
+                        param_moving_index += 1
+                        # if the param_level + 1 exceeds the number of parameters, try the next parameter
+                        if len(params_values[param_moving_index]) <= param_level + 1:
+                            param_moving_index += 1
+                        else:
+                            proceed = True
+                    # if nothing is found, proceed to the next level
+                    else:
+                        param_level += 1
+                        param_moving_index = -1
+                        proceed = True
+        if minimum_index is None:
+            raise ValueError(f"Could not evaluate the minimum base configuration in {self.num_initial_samples} samples.")
+
+        # next take the maximum for each individual parameter using DFS
+        for param_index, param_values in enumerate(params_values):
+            if len(param_values) <= 1:
+                continue
+            maximum_index = None
+            param_moving_level = len(param_values) - 1
+            while maximum_index is None and self.num_initial_samples - self.fevals > 0:
+                # take the minimum configuration as base
+                selected_param_config = minimum_config.clone()
+                # change only the currently selected parameter and look up the configuration in the search space
+                selected_param_config[param_index] = param_values[param_moving_level]
+                matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
+                match_mask = (matching_params == number_of_params)
+                found_num_matching_param_configs = match_mask.count_nonzero()
+                temp_index = self.index_counter[match_mask]
+                if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
+                    maximum_index = temp_index.item()
+                    if maximum_index not in list_param_config_indices:
+                        list_param_config_indices.append(maximum_index)
+                # if it doesn't exist and evaluate, move one parameter value down
+                else:
+                    param_moving_level -= 1
+                    if param_moving_level < 0:
+                        raise ValueError(f"No instance of parameter {param_index} is present in the search space and succesfully evaluated")
+            if maximum_index is None:
+                raise ValueError(f"Could not evaluate the maximum configuration for {param_index+1} out of {len(params_values)} within {self.num_initial_samples} samples.")
+
+        return list_param_config_indices
+
     def get_middle_index_of_least_evaluated_region(self) -> int:
         """ Get the middle index of the region of parameter configurations that is the least visited """
         # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 1c690c1fa..ca433bbfd 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -343,7 +343,11 @@ def get_valid_configs(tuning_options, max_threads) -> list:
     # form the parameter tuples in the order specified by tune_params.keys()
     parameter_space_list = list()
     for params in parameter_space:
-        parameter_space_list.append(tuple(params[param_name] for param_name in param_names))
+        param_config = tuple(params[param_name] for param_name in param_names)
+        if param_config not in parameter_space_list:
+            parameter_space_list.append(param_config)
+        else:
+            print(f"Duplicate {param_config}")
     return parameter_space_list
 
 

From b33c6bd05abc9518c1324b4259f8355310a264a2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 12:44:32 -0700
Subject: [PATCH 009/253] Skip strategies that don't have their dependencies
 installed

---
 test/context.py                    | 23 +++++++++++++++++++----
 test/strategies/__init__.py        |  0
 test/strategies/test_strategies.py | 14 ++++++++++++--
 3 files changed, 31 insertions(+), 6 deletions(-)
 create mode 100644 test/strategies/__init__.py

diff --git a/test/context.py b/test/context.py
index ba5030430..e99591764 100644
--- a/test/context.py
+++ b/test/context.py
@@ -1,7 +1,7 @@
-import sys
-import subprocess
-import shutil
 import os
+import shutil
+import subprocess
+import sys
 
 import pytest
 
@@ -47,7 +47,6 @@
     cupy_present = False
 
 try:
-    import cuda
 
     cuda_present = True
 except Exception:
@@ -63,6 +62,20 @@
 except ImportError:
     pyhip_present = False
 
+try:
+    import botorch
+    import torch
+    bayes_opt_botorch_present = True
+except ImportError:
+    bayes_opt_botorch_present = False
+
+try:
+    import gpytorch
+    import torch
+    bayes_opt_gpytorch_present = True
+except ImportError:
+    bayes_opt_gpytorch_present = False
+
 try:
     from autotuning_methodology.report_experiments import get_strategy_scores
     methodology_present = True
@@ -89,6 +102,8 @@
 skip_if_no_openmp = pytest.mark.skipif(not openmp_present, reason="No OpenMP found")
 skip_if_no_openacc = pytest.mark.skipif(not openacc_present, reason="No nvc++ on PATH")
 skip_if_no_pyhip = pytest.mark.skipif(not pyhip_present, reason="No PyHIP found")
+skip_if_no_bayesopt_gpytorch = pytest.mark.skipif(not bayes_opt_gpytorch_present, reason="Torch and GPyTorch not installed")
+skip_if_no_bayesopt_botorch = pytest.mark.skipif(not bayes_opt_botorch_present, reason="Torch and BOTorch not installed")
 skip_if_no_methodology = pytest.mark.skipif(not methodology_present, reason="Autotuning Methodology not found")
 
 
diff --git a/test/strategies/__init__.py b/test/strategies/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 096be38b0..57c43b4f7 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -7,6 +7,8 @@
 from kernel_tuner import util
 from kernel_tuner.interface import strategy_map
 
+from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
+
 cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/../test_cache_file.json"
 
 @pytest.fixture
@@ -32,8 +34,16 @@ def vector_add():
 
     return ["vector_add", kernel_string, size, args, tune_params]
 
-
-@pytest.mark.parametrize('strategy', strategy_map)
+# skip some strategies if their dependencies are not installed
+strategies = []
+for s in strategy_map.keys():
+    if 'gpytorch' in s.lower():
+        strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
+    elif 'botorch' in s.lower():
+        strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_botorch))
+    else:
+        strategies.append(s)
+@pytest.mark.parametrize('strategy', strategies)
 def test_strategies(vector_add, strategy):
 
     options = dict(popsize=5, neighbor='adjacent')

From 208fe7bad498808e1455d925093a5dd154f72c49 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 16:07:16 -0700
Subject: [PATCH 010/253] Tuning new optimization algorithm

---
 kernel_tuner/hyper.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index b94c58986..37235a26b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -86,13 +86,20 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
+    # hyperparams = {
+    #     'popsize': [10, 20, 30],
+    #     'maxiter': [50, 100, 150],
+    #     'w': [0.25, 0.5, 0.75],
+    #     'c1': [1.0, 2.0, 3.0],
+    #     'c2': [0.5, 1.0, 1.5]
+    # }
+    # result, env = tune_hyper_params('pso', hyperparams)
     hyperparams = {
-        'popsize': [10, 20, 30],
-        'maxiter': [50, 100, 150],
-        'w': [0.25, 0.5, 0.75],
-        'c1': [1.0, 2.0, 3.0],
-        'c2': [0.5, 1.0, 1.5]
+        'neighbor': ['Hamming', 'adjacent'],
+        'restart': [True, False],
+        'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
+        'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
     }
-    result, env = tune_hyper_params('pso', hyperparams)
+    result, env = tune_hyper_params('greedy_ils', hyperparams)
     print(result)
     print(env['best_config'])

From 6281a0c012ff8d71da2a6dd2c032db9722805518 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 16:18:30 -0700
Subject: [PATCH 011/253] Added new BO strategies to interface

---
 kernel_tuner/interface.py | 4 ++++
 test/context.py           | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 06b5058fe..a557ae589 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -50,6 +50,10 @@
 from kernel_tuner.strategies import (
     basinhopping,
     bayes_opt,
+    bayes_opt_alt_BOTorch,
+    bayes_opt_GPyTorch,
+    bayes_opt_GPyTorch_lean,
+    bayes_opt_old,
     brute_force,
     diff_evo,
     dual_annealing,
diff --git a/test/context.py b/test/context.py
index e99591764..e7bb7cbfa 100644
--- a/test/context.py
+++ b/test/context.py
@@ -47,7 +47,8 @@
     cupy_present = False
 
 try:
-
+    import cuda
+    print(cuda)
     cuda_present = True
 except Exception:
     cuda_present = False

From 5ab70df39b7fe3ec368ea03b4dcf0f4c3ffc526c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 16:19:30 -0700
Subject: [PATCH 012/253] Made BO GPyTorch implementations importable

---
 kernel_tuner/strategies/bayes_opt_GPyTorch.py | 148 +++++++-------
 .../strategies/bayes_opt_GPyTorch_lean.py     | 185 ++++++++++--------
 2 files changed, 175 insertions(+), 158 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
index 784c7d6c0..39da1c30d 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
@@ -1,9 +1,8 @@
-""" Bayesian Optimization implementation from the thesis by Willemsen """
-from copy import deepcopy
-from random import randint, shuffle
+"""Bayesian Optimization implementation from the thesis by Willemsen."""
 import itertools
-import warnings
 import time
+from copy import deepcopy
+from random import randint, shuffle
 from typing import Tuple
 
 import numpy as np
@@ -11,23 +10,42 @@
 
 # BO imports
 try:
-    import torch
     import gpytorch
-    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
+    import torch
     from sklearn.exceptions import ConvergenceWarning
+    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
     from skopt.sampler import Lhs
     bayes_opt_present = True
+
+    class ExactGPModel(gpytorch.models.ExactGP):
+        """Very simple exact Gaussian Process model."""
+
+        def __init__(self, train_x, train_y, likelihood):
+            super(gpytorch.models.ExactGP, self).__init__(train_x, train_y, likelihood)
+            self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
+            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
+
+        def forward(self, x):
+            mean_x = self.mean_module(x)
+            covar_x = self.covar_module(x)
+            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
 except ImportError:
     bayes_opt_present = False
 
-from kernel_tuner.strategies import minimize
+    class ExactGPModel():
+        def __init__(self, train_x, train_y, likelihood):
+            raise ImportError("GPyTorch not imported")
+        def forward(self, x):
+            raise ImportError("GPyTorch not imported")
+
 from kernel_tuner import util
+from kernel_tuner.strategies import minimize
 
 supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
 
 
 def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """ Generates normalization and denormalization dictionaries """
+    """Generates normalization and denormalization dictionaries."""
     original_to_normalized = dict()
     normalized_to_original = dict()
     for param_name in tune_params.keys():
@@ -43,14 +61,14 @@ def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict
 
 
 def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """ Normalize the parameter space given a normalization dictionary """
+    """Normalize the parameter space given a normalization dictionary."""
     keys = list(tune_params.keys())
     param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
     return param_space_normalized
 
 
 def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict: dict, max_threads: int):
-    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
     pruned_tune_params_mask = list()
     removed_tune_params = list()
     param_names = list(tune_params.keys())
@@ -73,7 +91,7 @@ def prune_parameter_space(parameter_space, tuning_options, tune_params, normaliz
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -95,6 +113,10 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
+    if not bayes_opt_present:
+        raise ImportError(
+            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
+        )
 
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
     prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
@@ -137,20 +159,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     return results, runner.dev.get_environment()
 
 
-class ExactGPModel(gpytorch.models.ExactGP):
-    """ Very simple exact Gaussian Process model """
-
-    def __init__(self, train_x, train_y, likelihood):
-        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
-        self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
-        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
-
-    def forward(self, x):
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
-        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-
-
 class BayesianOptimization():
 
     def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
@@ -170,8 +178,8 @@ def get_hyperparam(name: str, default, supported_values=list()):
             return value
 
         # get hyperparameters
-        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
-        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
+        get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
+        get_hyperparam("covariancelengthscale", 1.5)
         acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
         acq = acquisition_function
         acq_params = get_hyperparam("methodparams", {})
@@ -276,19 +284,19 @@ def current_optimum(self, value: float):
         self.__current_optimum = value
 
     def is_better_than(self, a: float, b: float) -> bool:
-        """ Determines which one is better depending on optimization direction """
+        """Determines which one is better depending on optimization direction."""
         return a < b if self.opt_direction == 'min' else a > b
 
     def is_not_visited(self, index: int) -> bool:
-        """ Returns whether a searchspace index has not been visited """
+        """Returns whether a searchspace index has not been visited."""
         return not self.__visited_searchspace_indices[index]
 
     def is_valid(self, observation: float) -> bool:
-        """ Returns whether an observation is valid """
-        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+        """Returns whether an observation is valid."""
+        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
 
     def get_af_by_name(self, name: str):
-        """ Get the basic acquisition functions by their name """
+        """Get the basic acquisition functions by their name."""
         basic_af_names = ['ei', 'poi', 'lcb']
         if name == 'ei':
             return self.af_expected_improvement
@@ -299,7 +307,7 @@ def get_af_by_name(self, name: str):
         raise ValueError(f"{name} not in {basic_af_names}")
 
     def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function """
+        """Set the acquisition function."""
         if acquisition_function == 'poi':
             self.__af = self.af_probability_of_improvement
         elif acquisition_function == 'ei':
@@ -320,16 +328,16 @@ def set_acquisition_function(self, acquisition_function: str):
             raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
 
     def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """ Set the surrogate model with a covariance function and lengthscale """
+        """Set the surrogate model with a covariance function and lengthscale."""
         # TODO remove or adapt this
         if cov_kernel_name == "constantrbf":
-            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
+            ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
         elif cov_kernel_name == "rbf":
-            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
+            RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
         elif cov_kernel_name == "matern32":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
+            Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
         elif cov_kernel_name == "matern52":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
+            Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
         else:
             raise ValueError(f"Acquisition function must be one of {self.supported_cov_kernels}, is {cov_kernel_name}")
         likelihood = gpytorch.likelihoods.GaussianLikelihood()
@@ -337,7 +345,7 @@ def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: floa
         # self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
 
     def valid_params_observations(self) -> Tuple[list, list]:
-        """ Returns a list of valid observations and their parameter configurations """
+        """Returns a list of valid observations and their parameter configurations."""
         # if you do this every iteration, better keep it as cache and update in update_after_evaluation
         params = list()
         observations = list()
@@ -348,30 +356,30 @@ def valid_params_observations(self) -> Tuple[list, list]:
         return params, observations
 
     def unvisited(self) -> list:
-        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
         params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
         return params
 
     def find_param_config_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the search space if it exists """
+        """Find a parameter config index in the search space if it exists."""
         return self.searchspace.index(param_config)
 
     def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the unvisited cache if it exists """
+        """Find a parameter config index in the unvisited cache if it exists."""
         return self.unvisited_cache.index(param_config)
 
     def normalize_param_config(self, param_config: tuple) -> tuple:
-        """ Normalizes a parameter configuration """
+        """Normalizes a parameter configuration."""
         normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return normalized
 
     def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """ Denormalizes a parameter configuration """
+        """Denormalizes a parameter configuration."""
         denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return denormalized
 
     def unprune_param_config(self, param_config: tuple) -> tuple:
-        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
         unpruned = list()
         pruned_count = 0
         for removed in self.removed_tune_params:
@@ -383,7 +391,7 @@ def unprune_param_config(self, param_config: tuple) -> tuple:
         return tuple(unpruned)
 
     def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """ Adjust the visited and valid index records accordingly """
+        """Adjust the visited and valid index records accordingly."""
         validity = self.is_valid(observation)
         self.__visited_num += 1
         self.__observations[index] = observation
@@ -398,11 +406,11 @@ def update_after_evaluation(self, observation: float, index: int, param_config:
                 self.current_optimum = observation
 
     def predict(self, x) -> Tuple[float, float]:
-        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
         return self.__model.predict([x], return_std=True)
 
     def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
-        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             # TODO use torch.cuda for GPU
             test_x = torch.Tensor(lst)
@@ -412,7 +420,7 @@ def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
             return mu.numpy(), std.numpy()
 
     def evaluate_objective_function(self, param_config: tuple) -> float:
-        """ Evaluates the objective function """
+        """Evaluates the objective function."""
         param_config = self.unprune_param_config(param_config)
         denormalized_param_config = self.denormalize_param_config(param_config)
         if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
@@ -423,7 +431,7 @@ def evaluate_objective_function(self, param_config: tuple) -> float:
         return val
 
     def add_model_hyperparams_to_result(self, param_config: tuple):
-        """ Add the model parameters (loss and noise) to the results dict at the last result """
+        """Add the model parameters (loss and noise) to the results dict at the last result."""
         # assert that the results index corresponds to the last index
         assert self.find_config_index_in_results(param_config) == len(self.results) - 1
 
@@ -432,7 +440,7 @@ def add_model_hyperparams_to_result(self, param_config: tuple):
             self.results[-1][key] = value
 
     def find_config_index_in_results(self, param_config: tuple):
-        """ Find the index of a parameter configuration in the results. Beware that this can be very slow! """
+        """Find the index of a parameter configuration in the results. Beware that this can be very slow!"""
         found_indices = list()
         for results_index, result_dict in enumerate(self.results):
             keys = list(result_dict.keys())
@@ -446,11 +454,11 @@ def find_config_index_in_results(self, param_config: tuple):
         return found_indices[0]
 
     def dimensions(self) -> list:
-        """ List of parameter values per parameter """
+        """List of parameter values per parameter."""
         return self.tune_params.values()
 
     def draw_random_sample(self) -> Tuple[list, int]:
-        """ Draw a random sample from the unvisited parameter configurations """
+        """Draw a random sample from the unvisited parameter configurations."""
         if len(self.unvisited_cache) < 1:
             raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
         index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
@@ -459,7 +467,7 @@ def draw_random_sample(self) -> Tuple[list, int]:
         return param_config, actual_index
 
     def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """ Draws an LHS-distributed sample from the search space """
+        """Draws an LHS-distributed sample from the search space."""
         if self.searchspace_size < num_samples:
             raise ValueError("Can't sample more than the size of the search space")
         if self.sampling_crit is None:
@@ -482,7 +490,7 @@ def draw_latin_hypercube_samples(self, num_samples: int) -> list:
         return list(zip(normalized_param_configs, indices))
 
     def train_model_hyperparams(self):
-        """ Train the model and likelihood hyperparameters """
+        """Train the model and likelihood hyperparameters."""
         # set to training modes
         self.__model.train()
         self.__likelihood.train()
@@ -494,8 +502,6 @@ def train_model_hyperparams(self):
         mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.__likelihood, self.__model)
 
         loss = 0
-        lengthscale = 0
-        noise = 0
         for i in range(self.training_iter):
             # Zero gradients from previous iteration
             optimizer.zero_grad()
@@ -521,7 +527,7 @@ def train_model_hyperparams(self):
         # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
 
     def initial_sample(self):
-        """ Draws an initial sample using random sampling """
+        """Draws an initial sample using random sampling."""
         if self.num_initial_samples <= 0:
             raise ValueError("At least one initial sample is required")
         if self.sampling_method == 'lhs':
@@ -563,7 +569,7 @@ def initial_sample(self):
         self.cv_norm_maximum = self.initial_std
 
     def contextual_variance(self, std: list):
-        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
         if not self.af_params['explorationfactor'] == 'CV':
             return None
         if self.opt_direction == 'min':
@@ -581,7 +587,7 @@ def contextual_variance(self, std: list):
         return np.mean(std) / self.current_optimum
 
     def __optimize(self, max_fevals):
-        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
@@ -598,7 +604,7 @@ def __optimize(self, max_fevals):
         return self.results
 
     def __optimize_multi(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
@@ -709,7 +715,7 @@ def __optimize_multi(self, max_fevals):
         return self.results
 
     def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         aqfs = self.multi_afs
@@ -811,7 +817,7 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
         return self.results
 
     def __optimize_multi_fast(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
@@ -834,14 +840,13 @@ def __optimize_multi_fast(self, max_fevals):
         return self.results
 
     def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
+        """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = range(len(self.unvisited_cache))
         shuffle(list_random)
         return list_random
 
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Probability of Improvement (PI) """
-
+        """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
         x_mu, x_std = predictions
         if hyperparam is None:
@@ -856,8 +861,7 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Expected Improvement (EI) """
-
+        """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
         x_mu, x_std = predictions
         if hyperparam is None:
@@ -874,8 +878,7 @@ def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (LCB) """
-
+        """Acquisition function Lower Confidence Bound (LCB)."""
         x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
@@ -886,8 +889,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
-
+        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
         x_mu, x_std = predictions
         if hyperparam is None:
@@ -905,7 +907,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
-        """ Visualize the model after the optimization """
+        """Visualize the model after the optimization."""
         print(self.__model.kernel_.get_params())
         print(self.__model.log_marginal_likelihood())
         import matplotlib.pyplot as plt
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index 59e385421..cc991dadf 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -1,22 +1,53 @@
-""" Lean implementation of Bayesian Optimization with GPyTorch """
+"""Lean implementation of Bayesian Optimization with GPyTorch."""
 # python
+import ast  # for casting strings to dict
+import warnings
 from copy import deepcopy
-from typing import Tuple
-from random import randint, shuffle, choice
 from math import ceil
-import warnings
-import ast    # for casting strings to dict
+from random import choice, randint, shuffle
+from typing import Tuple
 
 # external
 import numpy as np
 from numpy.random import default_rng
-import torch
-import gpytorch
-import arviz as az
 
-# internal
-from kernel_tuner.util import get_valid_configs
-from kernel_tuner.strategies import minimize
+from kernel_tuner.runners.runner import Runner
+from kernel_tuner.searchspace import Searchspace
+
+# optional
+try:
+    import gpytorch
+    import torch
+    # import arviz as az
+    bayes_opt_present = True
+
+    from torch import Tensor
+
+    class ExactGPModel(gpytorch.models.ExactGP):
+        def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
+            super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
+            self.mean_module = gpytorch.means.ZeroMean()
+            if cov_kernel_name == 'matern':
+                self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
+            elif cov_kernel_name == 'matern_scalekernel':
+                self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
+
+        def forward(self, x):
+            mean_x = self.mean_module(x)
+            covar_x = self.covar_module(x)
+            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+except ImportError:
+    bayes_opt_present = False
+
+    class Tensor():
+        pass
+
+    class ExactGPModel():
+        def __init__(self, train_x, train_y, likelihood):
+            raise ImportError("GPyTorch not imported")
+        def forward(self, x):
+            raise ImportError("GPyTorch not imported")
+
 
 # set supported hyperparameter values
 supported_precisions = ['float', 'double']
@@ -39,8 +70,8 @@ def default_optimizer_learningrates(key):
     return defaults[key]
 
 
-def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+def tune(searchspace: Searchspace, runner: Runner, tuning_options):
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -62,6 +93,10 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
+    if not bayes_opt_present:
+        raise ImportError(
+            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
+        )
 
     # set CUDA availability
     use_cuda = False
@@ -75,14 +110,13 @@ def tune(runner, kernel_options, device_options, tuning_options):
     optimization_direction = options.get("optimization_direction", 'min')
     num_initial_samples = int(options.get("popsize", 20))
     max_fevals = int(options.get("max_fevals", 220))
-    max_threads = runner.dev.max_threads
 
     # enabling scaling will unscale and snap inputs on evaluation, more efficient to scale all at once and keep unscaled values
     tuning_options["snap"] = False
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
-    parameter_space = get_valid_configs(tuning_options, max_threads)
+    parameter_space = searchspace.list.copy()
 
     # limit max_fevals to max size of the parameter space
     max_fevals = min(len(parameter_space), max_fevals)
@@ -92,32 +126,16 @@ def tune(runner, kernel_options, device_options, tuning_options):
         )
 
     # execute Bayesian Optimization
-    BO = BayesianOptimization(parameter_space, kernel_options, tuning_options, runner, num_initial_samples, optimization_direction, device)
+    BO = BayesianOptimization(parameter_space, tuning_options, runner, num_initial_samples, optimization_direction, device)
     all_results = BO.optimize(max_fevals)
 
     return all_results, runner.dev.get_environment()
 
 
-class ExactGPModel(gpytorch.models.ExactGP):
-
-    def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
-        self.mean_module = gpytorch.means.ZeroMean()
-        if cov_kernel_name == 'matern':
-            self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
-        elif cov_kernel_name == 'matern_scalekernel':
-            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
-
-    def forward(self, x):
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
-        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-
-
 class BayesianOptimization:
 
-    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str,
-                 device: torch.device) -> None:
+    def __init__(self, parameter_space: list, tuning_options, runner: Runner, num_initial_samples: int, optimization_direction: str,
+                 device) -> None:
         self.animate = False    # TODO remove
 
         # set defaults
@@ -128,7 +146,6 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.current_optimal_config = None
 
         # set Kernel Tuner data
-        self.kernel_options = kernel_options
         self.tuning_options = tuning_options
         self.runner = runner
         self.max_threads = runner.dev.max_threads
@@ -157,7 +174,7 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.af_params = af_params
 
         # set Tensors
-        self.device = device
+        self.device: torch.device = device
         self.out_device = torch.device("cpu")
         self.size = len(parameter_space)
         self.index_counter = torch.arange(self.size)
@@ -216,12 +233,12 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
 
     @property
     def train_x(self):
-        """ Get the valid parameter configurations """
+        """Get the valid parameter configurations."""
         return self.param_configs_scaled[self.valid_configs].to(self.device)
 
     @property
     def train_y(self):
-        """ Get the valid results """
+        """Get the valid results."""
         outputs = self.results[self.valid_configs]
         if self.scaled_output:
             # z-score, remove mean and make unit variance to scale it to N(0,1)
@@ -231,7 +248,7 @@ def train_y(self):
 
     @property
     def train_y_err(self):
-        """ Get the error on the valid results """
+        """Get the error on the valid results."""
         std = self.results_std[self.valid_configs]
         if self.scaled_output and std.std() > 0.0:
             std = (std - std.mean()) / std.std()    # use z-score to get normalized variability
@@ -239,39 +256,39 @@ def train_y_err(self):
 
     @property
     def test_x(self):
-        """ Get the not yet visited parameter configurations """
+        """Get the not yet visited parameter configurations."""
         return self.param_configs_scaled[self.unvisited_configs].to(self.device)
 
     @property
     def test_x_unscaled(self):
-        """ Get the unscaled, not yet visited parameter configurations """
+        """Get the unscaled, not yet visited parameter configurations."""
         return self.param_configs[self.unvisited_configs]
 
     @property
     def test_y_err(self):
-        """ Get the expected error on the test set """
+        """Get the expected error on the test set."""
         train_y_err = self.train_y_err
         return torch.full((self.size - len(train_y_err), ), torch.mean(train_y_err))
 
     @property
     def invalid_x(self):
-        """ Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid) """
+        """Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid)."""
         invalid_mask = (self.unvisited_configs == self.valid_configs)
         return self.param_configs[invalid_mask]
 
     def true_param_config_index(self, target_index: int) -> int:
-        """ The index required to get the true config param index when dealing with test_x """
+        """The index required to get the true config param index when dealing with test_x."""
         # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
         masked_counter = self.index_counter[self.unvisited_configs]
         return masked_counter[target_index]
 
-    def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tensor:
-        """ Same as true_param_config_index, but for an array of targets instead. """
+    def true_param_config_indices(self, target_indices: Tensor) -> Tensor:
+        """Same as true_param_config_index, but for an array of targets instead."""
         masked_counter = self.index_counter[self.unvisited_configs]
         return masked_counter.index_select(0, target_indices)
 
     def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
-        """ Initialize the surrogate model """
+        """Initialize the surrogate model."""
         # self.initial_sample_std = self.min_std
         if take_initial_sample:
             self.initial_sample()
@@ -311,7 +328,7 @@ def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
             self.train_hyperparams(0)
 
     def import_cached_evaluations(self):
-        """ Import the previously evaluated configurations into this run """
+        """Import the previously evaluated configurations into this run."""
         # make strings of all the parameter configurations in the search space
         param_config_strings = list()
         for param_config in self.true_param_configs:
@@ -329,7 +346,7 @@ def import_cached_evaluations(self):
         print(f"Imported {len(self.all_results)} previously evaluated configurations.")
 
     def initial_sample(self):
-        """ Take an initial sample of the parameter space """
+        """Take an initial sample of the parameter space."""
         list_param_config_indices = list(self.index_counter[~self.unvisited_configs])
 
         # generate a random offset from a normal distribution to add to the sample indices
@@ -378,8 +395,8 @@ def initial_sample(self):
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
 
-    def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
-        """ Get a centered Latin Hypercube Sample with a random offset """
+    def get_lhs_samples(self, random_offsets: np.ndarray) -> Tensor:
+        """Get a centered Latin Hypercube Sample with a random offset."""
         n_samples = self.num_initial_samples - self.fevals
 
         # first get the seperate parameter values to make possibly fictional distributed parameter configurations
@@ -444,7 +461,7 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
         return param_configs_indices
 
     def take_min_max_initial_samples(self, list_param_config_indices: list, samples_per_parameter=1) -> list:
-        """ Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters """
+        """Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters."""
         # number of samples required is at least (samples_per_parameter) * (number of parameters) + 1
 
         # first get the individual parameter values and sort them
@@ -524,7 +541,7 @@ def take_min_max_initial_samples(self, list_param_config_indices: list, samples_
         return list_param_config_indices
 
     def get_middle_index_of_least_evaluated_region(self) -> int:
-        """ Get the middle index of the region of parameter configurations that is the least visited """
+        """Get the middle index of the region of parameter configurations that is the least visited."""
         # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
         distance_tensor = torch.arange(self.size)
 
@@ -542,7 +559,7 @@ def get_middle_index_of_least_evaluated_region(self) -> int:
         return middle_index
 
     def train_hyperparams(self, training_iter: int):
-        """ Optimize the surrogate model hyperparameters iteratively """
+        """Optimize the surrogate model hyperparameters iteratively."""
         self.model.train()
         self.likelihood.train()
 
@@ -601,7 +618,7 @@ def closure():
         self.likelihood.eval()
 
     def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
-        """ Optimize the objective """
+        """Optimize the objective."""
         predictions_tuple = None
         short_param_config_index = None
         last_invalid = False
@@ -625,9 +642,9 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 param_config_index = least_evaluated_region_index
                 short_param_config_index = -1
                 if mean_has_NaN:
-                    warning_reason = f"there were NaN in the predicted mean"
+                    warning_reason = "there were NaN in the predicted mean"
                 elif std_has_NaN:
-                    warning_reason = f"there were NaN in the predicted std"
+                    warning_reason = "there were NaN in the predicted std"
                 else:
                     warning_reason = "all STDs were the same"
                 warnings.warn(
@@ -677,10 +694,10 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
         return self.all_results
 
     def objective_function(self, param_config: tuple) -> float:
-        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results, check_restrictions=False)
+        return self.runner.run(param_config, self.tuning_options)
 
     def evaluate_config(self, param_config_index: int) -> float:
-        """ Evaluates a parameter configuration, returns the time """
+        """Evaluates a parameter configuration, returns the time."""
         param_config = self.true_param_configs[param_config_index]
         time = self.objective_function(param_config)
         self.register_result(time, param_config_index)
@@ -689,9 +706,9 @@ def evaluate_config(self, param_config_index: int) -> float:
         return time
 
     def register_result(self, result: float, param_config_index: int):
-        """ Registers the result to the Tensors and adds the hyperparameters to the results dict """
+        """Registers the result to the Tensors and adds the hyperparameters to the results dict."""
         # set the unvisited Tensors
-        if self.unvisited_configs[param_config_index] == False:
+        if self.unvisited_configs[param_config_index] is False:
             raise ValueError(f"The param config index {param_config_index} was already set to False!")
         self.unvisited_configs[param_config_index] = False
 
@@ -712,13 +729,13 @@ def register_result(self, result: float, param_config_index: int):
         # TODO check if it is possible to write the results with hyperparameters to the cache if not in simulation mode, maybe with observer?
 
     def update_unique_results(self):
-        """ Updates the unique results dictionary """
+        """Updates the unique results dictionary."""
         record = self.all_results[-1]
         # make a unique string by taking every value in a result, if it already exists, it is overwritten
         self.unique_results.update({",".join([str(v) for k, v in record.items() if k in self.tuning_options.tune_params]): record["time"]})
 
-    def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations """
+    def predict_list(self) -> Tuple[Tensor, Tensor]:
+        """Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations."""
         with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
             try:
                 observed_pred = self.likelihood(self.model(self.test_x))
@@ -735,16 +752,16 @@ def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
                 warnings.warn(str(e), RuntimeWarning)
                 return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
 
-    def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
-        """ compute probability of improvement by assuming normality on the difference in improvement """
+    def get_diff_improvement(self, y_mu, y_std, fplus) -> Tensor:
+        """Compute probability of improvement by assuming normality on the difference in improvement."""
         diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
         diff_improvement = (diff_improvement - diff_improvement.mean()) / max(diff_improvement.std(), self.min_std)    # force to N(0,1) with z-score
         if self.optimization_direction == 'max':
             diff_improvement = -diff_improvement
         return diff_improvement
 
-    def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
-        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+    def contextual_variance(self, mean: Tensor, std: Tensor):
+        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
         if not self.af_params['explorationfactor'] == 'CV':
             raise ValueError(f"Contextual Variance was called, but is not set as the exploration factor ({self.af_params['explorationfactor']})")
         if self.optimization_direction == 'max':
@@ -767,14 +784,13 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
 
     def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
+        """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = list(range(len(self.unvisited_param_configs)))
         shuffle(list_random)
         return list_random
 
-    def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
-        """ Acquisition function Probability of Improvement (PoI) tensor-based """
-
+    def af_probability_of_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
+        """Acquisition function Probability of Improvement (PoI) tensor-based."""
         # prefetch required data
         y_mu, y_std = predictions
         if hyperparam is None:
@@ -790,9 +806,8 @@ def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor,
         #     raise FloatingPointError("You need to scale the diff_improvement-values!")
         return cdf
 
-    def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
-        """ Acquisition function Expected Improvement (EI) tensor-based """
-
+    def af_expected_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
+        """Acquisition function Expected Improvement (EI) tensor-based."""
         # prefetch required data
         y_mu, y_std = predictions
         if hyperparam is None:
@@ -819,7 +834,7 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
     """                  """
 
     def apply_scaling_to_inputs(self):
-        """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
+        """Scale the inputs using min-max normalization (0-1) and remove constant parameters."""
         param_configs_scaled = torch.zeros_like(self.param_configs)
 
         # first get the scaling factors of each parameter
@@ -849,13 +864,13 @@ def apply_scaling_to_inputs(self):
             self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
         self.nonstatic_params = unchanging_params_tensor
 
-    def find_nearest(self, value, array: torch.Tensor):
-        """ Find the value nearest to the given value in the array """
+    def find_nearest(self, value, array: Tensor):
+        """Find the value nearest to the given value in the array."""
         index = (torch.abs(array - value)).argmin()
         return array[index]
 
     def get_hyperparam(self, name: str, default, supported_values=list(), type=None, cast=None):
-        """ Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function """
+        """Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function."""
         value = self.tuning_options.strategy_options.get(name, default)
 
         # check with predifined value list
@@ -873,12 +888,12 @@ def get_hyperparam(self, name: str, default, supported_values=list(), type=None,
             value = float(value)
         return value
 
-    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Remove an index from a tuple of predictions """
+    def remove_from_predict_list(self, p: Tuple[Tensor, Tensor], i: int) -> Tuple[Tensor, Tensor]:
+        """Remove an index from a tuple of predictions."""
         return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
 
     def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function based on the name """
+        """Set the acquisition function based on the name."""
         if acquisition_function not in supported_methods:
             raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
 
@@ -889,8 +904,8 @@ def set_acquisition_function(self, acquisition_function: str):
         elif acquisition_function == 'random':
             self.acquisition_function = self.af_random
 
-    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
-        """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
+    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[Tensor, dict]:
+        """Transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params."""
         parameter_space = deepcopy(parameter_space)
         number_of_params = len(parameter_space[0])
 
@@ -920,7 +935,7 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
         return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
 
     def visualize(self):
-        """ Visualize the surrogate model and observations in a plot """
+        """Visualize the surrogate model and observations in a plot."""
         if self.fevals < 220:
             return None
         from matplotlib import pyplot as plt

From e407a84d3332292c3f5b0e2aa9143bf3a4cf4c11 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 17:30:12 -0700
Subject: [PATCH 013/253] Compatibility with optional dependencies

---
 kernel_tuner/strategies/bayes_opt.py     | 21 ++++--
 kernel_tuner/strategies/bayes_opt_old.py | 96 ++++++++++++------------
 2 files changed, 59 insertions(+), 58 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 89318cd04..c384ecb97 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -451,7 +451,7 @@ def predict_list(self, lst: list) -> Tuple[list, list, list]:
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             mu, std = self.__model.predict(lst, return_std=True)
-            return mu, std
+            return list(zip(mu, std)), mu, std
 
     def fit_observations_to_model(self):
         """Update the model based on the current list of observations."""
@@ -540,7 +540,7 @@ def initial_sample(self):
             if self.is_valid(observation):
                 collected_samples += 1
         self.fit_observations_to_model()
-        _, std = self.predict_list(self.unvisited_cache)
+        _, _, std = self.predict_list(self.unvisited_cache)
         self.initial_sample_mean = np.mean(self.__valid_observations)
         # Alternatively:
         # self.initial_sample_std = np.std(self.__valid_observations)
@@ -736,11 +736,11 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                 if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
                     break
                 if increase_precision is True:
-                    predictions, _, std = self.predict_list(self.unvisited_cache)
+                    predictions = self.predict_list(self.unvisited_cache)
                     hyperparam = self.contextual_variance(std)
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]  # to avoid going out of bounds
+                # del predictions[best_af]  # to avoid going out of bounds
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -855,13 +855,12 @@ def af_random(self, predictions=None, hyperparam=None) -> list:
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
-        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for (x_mu, x_std) in predictions)
+        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for x_mu, x_std in predictions[0])
 
         # compute probability of improvement with CDF in bulk
         list_prob_improvement = norm.cdf(list_diff_improvement)
@@ -870,10 +869,15 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
-        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
+        if len(predictions) == 3:
+            predictions, x_mu, x_std = predictions
+        elif len(predictions) == 2:
+            x_mu, x_std = predictions
+        else:
+            raise ValueError(f"Invalid predictions size {len(predictions)}")
 
         # precompute difference of improvement, CDF and PDF in bulk
         list_diff_improvement = list((fplus - x_mu) / (x_std + 1e-9) for (x_mu, x_std) in predictions)
@@ -892,6 +896,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         beta = hyperparam
+        _, x_mu, x_std = predictions
 
         # compute LCB in bulk
         list_lower_confidence_bound = (x_mu - beta * x_std)
@@ -900,7 +905,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
-        x_mu, x_std = predictions
+        _, x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
 
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
index 6107fad0b..c3381731a 100644
--- a/kernel_tuner/strategies/bayes_opt_old.py
+++ b/kernel_tuner/strategies/bayes_opt_old.py
@@ -1,32 +1,33 @@
-""" Bayesian Optimization implementation from the thesis by Willemsen """
-from copy import deepcopy
-from random import randint, shuffle
+"""Bayesian Optimization implementation from the thesis by Willemsen."""
 import itertools
-import warnings
 import time
+import warnings
+from copy import deepcopy
+from random import randint, shuffle
 
 import numpy as np
 
 # BO imports
 try:
     from typing import Tuple
+
     from scipy.stats import norm
-    from sklearn.gaussian_process import GaussianProcessRegressor
-    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
     from sklearn.exceptions import ConvergenceWarning
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
     from skopt.sampler import Lhs
     bayes_opt_present = True
 except ImportError:
     bayes_opt_present = False
 
-from kernel_tuner.strategies import minimize
 from kernel_tuner import util
+from kernel_tuner.strategies import minimize
 
 supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
 
 
 def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """ Generates normalization and denormalization dictionaries """
+    """Generates normalization and denormalization dictionaries."""
     original_to_normalized = dict()
     normalized_to_original = dict()
     for param_name in tune_params.keys():
@@ -42,14 +43,14 @@ def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict
 
 
 def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """ Normalize the parameter space given a normalization dictionary """
+    """Normalize the parameter space given a normalization dictionary."""
     keys = list(tune_params.keys())
     param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
     return param_space_normalized
 
 
 def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
-    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
     pruned_tune_params_mask = list()
     removed_tune_params = list()
     param_names = list(tune_params.keys())
@@ -68,7 +69,7 @@ def prune_parameter_space(parameter_space, tuning_options, tune_params, normaliz
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -90,7 +91,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
-
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
     prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
     if not bayes_opt_present:
@@ -252,19 +252,19 @@ def current_optimum(self, value: float):
         self.__current_optimum = value
 
     def is_better_than(self, a: float, b: float) -> bool:
-        """ Determines which one is better depending on optimization direction """
+        """Determines which one is better depending on optimization direction."""
         return a < b if self.opt_direction == 'min' else a > b
 
     def is_not_visited(self, index: int) -> bool:
-        """ Returns whether a searchspace index has not been visited """
+        """Returns whether a searchspace index has not been visited."""
         return not self.__visited_searchspace_indices[index]
 
     def is_valid(self, observation: float) -> bool:
-        """ Returns whether an observation is valid """
-        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+        """Returns whether an observation is valid."""
+        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
 
     def get_af_by_name(self, name: str):
-        """ Get the basic acquisition functions by their name """
+        """Get the basic acquisition functions by their name."""
         basic_af_names = ['ei', 'poi', 'lcb']
         if name == 'ei':
             return self.af_expected_improvement
@@ -275,7 +275,7 @@ def get_af_by_name(self, name: str):
         raise ValueError(f"{name} not in {basic_af_names}")
 
     def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function """
+        """Set the acquisition function."""
         if acquisition_function == 'poi':
             self.__af = self.af_probability_of_improvement
         elif acquisition_function == 'ei':
@@ -296,7 +296,7 @@ def set_acquisition_function(self, acquisition_function: str):
             raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
 
     def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """ Set the surrogate model with a covariance function and lengthscale """
+        """Set the surrogate model with a covariance function and lengthscale."""
         if cov_kernel_name == "constantrbf":
             kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
         elif cov_kernel_name == "rbf":
@@ -310,7 +310,7 @@ def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: floa
         self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
 
     def valid_params_observations(self) -> Tuple[list, list]:
-        """ Returns a list of valid observations and their parameter configurations """
+        """Returns a list of valid observations and their parameter configurations."""
         # if you do this every iteration, better keep it as cache and update in update_after_evaluation
         params = list()
         observations = list()
@@ -321,30 +321,30 @@ def valid_params_observations(self) -> Tuple[list, list]:
         return params, observations
 
     def unvisited(self) -> list:
-        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
         params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
         return params
 
     def find_param_config_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the search space if it exists """
+        """Find a parameter config index in the search space if it exists."""
         return self.searchspace.index(param_config)
 
     def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the unvisited cache if it exists """
+        """Find a parameter config index in the unvisited cache if it exists."""
         return self.unvisited_cache.index(param_config)
 
     def normalize_param_config(self, param_config: tuple) -> tuple:
-        """ Normalizes a parameter configuration """
+        """Normalizes a parameter configuration."""
         normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return normalized
 
     def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """ Denormalizes a parameter configuration """
+        """Denormalizes a parameter configuration."""
         denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return denormalized
 
     def unprune_param_config(self, param_config: tuple) -> tuple:
-        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
         unpruned = list()
         pruned_count = 0
         for removed in self.removed_tune_params:
@@ -356,7 +356,7 @@ def unprune_param_config(self, param_config: tuple) -> tuple:
         return tuple(unpruned)
 
     def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """ Adjust the visited and valid index records accordingly """
+        """Adjust the visited and valid index records accordingly."""
         validity = self.is_valid(observation)
         self.__visited_num += 1
         self.__observations[index] = observation
@@ -371,22 +371,22 @@ def update_after_evaluation(self, observation: float, index: int, param_config:
                 self.current_optimum = observation
 
     def predict(self, x) -> Tuple[float, float]:
-        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
         return self.__model.predict([x], return_std=True)
 
     def predict_list(self, lst: list) -> Tuple[list, list, list]:
-        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             mu, std = self.__model.predict(lst, return_std=True)
             return list(zip(mu, std)), mu, std
 
     def fit_observations_to_model(self):
-        """ Update the model based on the current list of observations """
+        """Update the model based on the current list of observations."""
         self.__model.fit(self.__valid_params, self.__valid_observations)
 
     def evaluate_objective_function(self, param_config: tuple) -> float:
-        """ Evaluates the objective function """
+        """Evaluates the objective function."""
         param_config = self.unprune_param_config(param_config)
         denormalized_param_config = self.denormalize_param_config(param_config)
         if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
@@ -396,11 +396,11 @@ def evaluate_objective_function(self, param_config: tuple) -> float:
         return val
 
     def dimensions(self) -> list:
-        """ List of parameter values per parameter """
+        """List of parameter values per parameter."""
         return self.tune_params.values()
 
     def draw_random_sample(self) -> Tuple[list, int]:
-        """ Draw a random sample from the unvisited parameter configurations """
+        """Draw a random sample from the unvisited parameter configurations."""
         if len(self.unvisited_cache) < 1:
             raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
         index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
@@ -409,7 +409,7 @@ def draw_random_sample(self) -> Tuple[list, int]:
         return param_config, actual_index
 
     def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """ Draws an LHS-distributed sample from the search space """
+        """Draws an LHS-distributed sample from the search space."""
         if self.searchspace_size < num_samples:
             raise ValueError("Can't sample more than the size of the search space")
         if self.sampling_crit is None:
@@ -432,7 +432,7 @@ def draw_latin_hypercube_samples(self, num_samples: int) -> list:
         return list(zip(normalized_param_configs, indices))
 
     def initial_sample(self):
-        """ Draws an initial sample using random sampling """
+        """Draws an initial sample using random sampling."""
         if self.num_initial_samples <= 0:
             raise ValueError("At least one initial sample is required")
         if self.sampling_method == 'lhs':
@@ -466,7 +466,7 @@ def initial_sample(self):
         self.cv_norm_maximum = self.initial_std
 
     def contextual_variance(self, std: list):
-        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
         if not self.af_params['explorationfactor'] == 'CV':
             return None
         if self.opt_direction == 'min':
@@ -484,7 +484,7 @@ def contextual_variance(self, std: list):
         return np.mean(std) / self.current_optimum
 
     def __optimize(self, max_fevals):
-        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
@@ -501,7 +501,7 @@ def __optimize(self, max_fevals):
         return self.results
 
     def __optimize_multi(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
@@ -612,7 +612,7 @@ def __optimize_multi(self, max_fevals):
         return self.results
 
     def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         aqfs = self.multi_afs
@@ -713,7 +713,7 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
         return self.results
 
     def __optimize_multi_fast(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
@@ -735,14 +735,13 @@ def __optimize_multi_fast(self, max_fevals):
         return self.results
 
     def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
+        """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = range(len(self.unvisited_cache))
         shuffle(list_random)
         return list_random
 
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Probability of Improvement (PI) """
-
+        """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -759,8 +758,7 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Expected Improvement (EI) """
-
+        """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -784,8 +782,7 @@ def exp_improvement(index) -> float:
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (LCB) """
-
+        """Acquisition function Lower Confidence Bound (LCB)."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -798,8 +795,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
-
+        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -818,7 +814,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
-        """ Visualize the model after the optimization """
+        """Visualize the model after the optimization."""
         print(self.__model.kernel_.get_params())
         print(self.__model.log_marginal_likelihood())
         import matplotlib.pyplot as plt

From e6c457da8a988d5a2e40b6fb02de115d47e3cd2e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 12:51:43 -0700
Subject: [PATCH 014/253] Improved time unit conversion

---
 kernel_tuner/core.py       |  2 +-
 kernel_tuner/file_utils.py |  2 +-
 kernel_tuner/interface.py  | 19 +++++++++++++++----
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index f139111e7..6dc580850 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -598,7 +598,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
                     if kernel_options.texmem_args is not None:
                         self.dev.copy_texture_memory_args(kernel_options.texmem_args)
 
-                # stop compilation stopwatch and convert to miliseconds
+                # stop compilation stopwatch and convert to milliseconds
                 last_compilation_time = 1000 * (time.perf_counter() - start_compilation)
 
                 # test kernel for correctness
diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
index 2b75cc023..9231f0e2e 100644
--- a/kernel_tuner/file_utils.py
+++ b/kernel_tuner/file_utils.py
@@ -152,7 +152,7 @@ def get_t4_results(results, tune_params, objective="time"):
 
     # write output_data to a JSON file
     version, _ = output_file_schema("results")
-    output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'miliseconds'})
+    output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'milliseconds'})
     return output_json
 
 def store_output_file(output_filename: str, results, tune_params, objective="time"):
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index a557ae589..2bfa06a89 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -613,8 +613,12 @@ def tune_kernel(
     util.append_default_block_size_names(block_size_names)
 
     # if the restrictions are not constraints or a callable, the restrictions are strings, so parse them to functions (increases restrictions check performance significantly)
-    if restrictions is not None and not callable(restrictions) and not any(isinstance(r, Constraint) for r in restrictions):
-        restrictions = util.parse_restrictions(restrictions)
+    if (
+        restrictions is not None
+        and not callable(restrictions)
+        and not any(isinstance(r, Constraint) for r in restrictions)
+    ):
+        restrictions = util.parse_restrictions(restrictions, tune_params)
 
     # sort all the options into separate dicts
     opts = locals()
@@ -854,7 +858,14 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names):
     util.check_block_size_names(block_size_names)
 
 
-def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation_mode = False, output_T4 = True, iterations = 7, strategy_options = None):
+def tune_kernel_T1(
+    input_filepath: Path,
+    cache_filepath: Path = None,
+    simulation_mode=False,
+    output_T4=True,
+    iterations=7,
+    strategy_options=None,
+):
     """Call the tune function with a T1 input file."""
     inputs = get_input_file(input_filepath)
     kernelspec: dict = inputs["KernelSpecification"]
@@ -952,7 +963,7 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation
         verbose=False,
         iterations=iterations,
         strategy=strategy,
-        strategy_options=strategy_options
+        strategy_options=strategy_options,
     )
     if output_T4:
         return get_t4_metadata(), get_t4_results(results, tune_params)

From a9f8de4303b9442fb1bb96972bb129c4d43583d3 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 14:39:35 -0700
Subject: [PATCH 015/253] Changed hyperparameter tuning setup

---
 kernel_tuner/hyper.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 37235a26b..08d998dd3 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -86,20 +86,20 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
-    # hyperparams = {
-    #     'popsize': [10, 20, 30],
-    #     'maxiter': [50, 100, 150],
-    #     'w': [0.25, 0.5, 0.75],
-    #     'c1': [1.0, 2.0, 3.0],
-    #     'c2': [0.5, 1.0, 1.5]
-    # }
-    # result, env = tune_hyper_params('pso', hyperparams)
     hyperparams = {
-        'neighbor': ['Hamming', 'adjacent'],
-        'restart': [True, False],
-        'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
-        'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+        'popsize': [10, 20, 30],
+        'maxiter': [50, 100, 150],
+        'w': [0.25, 0.5, 0.75],
+        'c1': [1.0, 2.0, 3.0],
+        'c2': [0.5, 1.0, 1.5]
     }
-    result, env = tune_hyper_params('greedy_ils', hyperparams)
+    result, env = tune_hyper_params('pso', hyperparams)
+    # hyperparams = {
+    #     'neighbor': ['Hamming', 'adjacent'],
+    #     'restart': [True, False],
+    #     'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
+    #     'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+    # }
+    # result, env = tune_hyper_params('greedy_ils', hyperparams)
     print(result)
     print(env['best_config'])

From 42319998384b4a0211ede5f2e14aacfd9e7233d2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 15:28:00 -0700
Subject: [PATCH 016/253] Added the hyperparamtuning experiments file

---
 .gitignore            |   3 +-
 hyperparamtuning.json | 103 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 hyperparamtuning.json

diff --git a/.gitignore b/.gitignore
index e38385b00..39d734594 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 poetry.lock
 noxenv.txt
 noxsettings.toml
-hyperparamtuning/
+hyperparamtuning/*
 
 ### Python ###
 *.pyc
@@ -17,6 +17,7 @@ push_to_pypi.sh
 .nfs*
 *.log
 *.json
+!hyperparamtuning.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
 *.csv
diff --git a/hyperparamtuning.json b/hyperparamtuning.json
new file mode 100644
index 000000000..19dba21cb
--- /dev/null
+++ b/hyperparamtuning.json
@@ -0,0 +1,103 @@
+{
+    "version": "1.1.0",
+    "name": "hyperparamtuning",
+    "parent_folder": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/kernel_tuner/hyperparamtuning",
+    "experimental_groups_defaults": {
+        "applications": [
+            {
+                "name": "convolution",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "convolution.json"
+            },
+            {
+                "name": "pnpoly",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "pnpoly.json"
+            }
+        ],
+        "gpus": [
+            "RTX_3090",
+            "RTX_2080_Ti"
+        ],
+        "pattern_for_full_search_space_filenames": {
+            "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json"
+        },
+        "stochastic": true,
+        "repeats": 25,
+        "samples": 1,
+        "minimum_number_of_valid_search_iterations": 20,
+        "ignore_cache": false
+    },
+    "search_strategies": [
+        {
+            "autotuner": "KernelTuner",
+            "name": "genetic_algorithm_popsize=5_maxiter=5_method=uniform_mutation_chance=10",
+            "display_name": "Genetic algorithm",
+            "search_method": "genetic_algorithm",
+            "search_method_hyperparameters": [
+                {
+                    "name": "popsize",
+                    "value": 5
+                },
+                {
+                    "name": "maxiter",
+                    "value": 5
+                },
+                {
+                    "name": "method",
+                    "value": "uniform"
+                },
+                {
+                    "name": "mutation_chance",
+                    "value": 10
+                }
+            ]
+        }
+    ],
+    "statistics_settings": {
+        "minimization": true,
+        "cutoff_percentile": 0.96,
+        "cutoff_percentile_start": 0.5,
+        "cutoff_type": "fevals",
+        "objective_time_keys": [
+            "all"
+        ],
+        "objective_performance_keys": [
+            "time"
+        ]
+    },
+    "visualization_settings": {
+        "plots": [
+            {
+                "scope": "searchspace",
+                "style": "line",
+                "x_axis_value_types": [
+                    "fevals"
+                ],
+                "y_axis_value_types": [
+                    "normalized",
+                    "baseline"
+                ]
+            },
+            {
+                "scope": "searchspace",
+                "style": "line",
+                "x_axis_value_types": [
+                    "time"
+                ],
+                "y_axis_value_types": [
+                    "normalized",
+                    "baseline"
+                ]
+            },
+            {
+                "scope": "aggregate",
+                "style": "line"
+            }
+        ],
+        "resolution": 1000.0,
+        "confidence_level": 0.95,
+        "compare_baselines": false,
+        "compare_split_times": false
+    }
+}
\ No newline at end of file

From 6fe94ca21b10a98c756052b5618e2f109f474466 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 15:42:58 -0700
Subject: [PATCH 017/253] Changed from BAT to HIP paper searchspaces

---
 hyperparamtuning.json | 49 +++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/hyperparamtuning.json b/hyperparamtuning.json
index 19dba21cb..947eb2904 100644
--- a/hyperparamtuning.json
+++ b/hyperparamtuning.json
@@ -5,19 +5,32 @@
     "experimental_groups_defaults": {
         "applications": [
             {
-                "name": "convolution",
+                "name": "convolution_milo",
                 "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "convolution.json"
+                "input_file": "convolution_milo.json"
             },
             {
-                "name": "pnpoly",
+                "name": "dedisp_milo",
                 "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "pnpoly.json"
+                "input_file": "dedisp_milo.json"
+            },
+            {
+                "name": "gemm_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "dedisp_milo.json"
+            },
+            {
+                "name": "hotspot_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "dedisp_milo.json"
             }
         ],
         "gpus": [
-            "RTX_3090",
-            "RTX_2080_Ti"
+            "A100",
+            "A4000",
+            "MI50",
+            "MI250X",
+            "W6600"
         ],
         "pattern_for_full_search_space_filenames": {
             "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json"
@@ -31,27 +44,9 @@
     "search_strategies": [
         {
             "autotuner": "KernelTuner",
-            "name": "genetic_algorithm_popsize=5_maxiter=5_method=uniform_mutation_chance=10",
-            "display_name": "Genetic algorithm",
-            "search_method": "genetic_algorithm",
-            "search_method_hyperparameters": [
-                {
-                    "name": "popsize",
-                    "value": 5
-                },
-                {
-                    "name": "maxiter",
-                    "value": 5
-                },
-                {
-                    "name": "method",
-                    "value": "uniform"
-                },
-                {
-                    "name": "mutation_chance",
-                    "value": 10
-                }
-            ]
+            "name": "brute_force",
+            "display_name": "Brute force",
+            "search_method": "brute_force"
         }
     ],
     "statistics_settings": {

From 05c39cbad6cefdfe64c76083a702fc8d5aebc12d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 15:59:34 -0700
Subject: [PATCH 018/253] Changed from BAT to HIP paper searchspaces

---
 .gitignore                          |  1 -
 hyperparamtuning.json               | 98 -----------------------------
 kernel_tuner/backends/hypertuner.py | 25 ++++----
 kernel_tuner/hyper.py               |  3 +-
 4 files changed, 13 insertions(+), 114 deletions(-)
 delete mode 100644 hyperparamtuning.json

diff --git a/.gitignore b/.gitignore
index 39d734594..47ffc4024 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,7 +17,6 @@ push_to_pypi.sh
 .nfs*
 *.log
 *.json
-!hyperparamtuning.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
 *.csv
diff --git a/hyperparamtuning.json b/hyperparamtuning.json
deleted file mode 100644
index 947eb2904..000000000
--- a/hyperparamtuning.json
+++ /dev/null
@@ -1,98 +0,0 @@
-{
-    "version": "1.1.0",
-    "name": "hyperparamtuning",
-    "parent_folder": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/kernel_tuner/hyperparamtuning",
-    "experimental_groups_defaults": {
-        "applications": [
-            {
-                "name": "convolution_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "convolution_milo.json"
-            },
-            {
-                "name": "dedisp_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "dedisp_milo.json"
-            },
-            {
-                "name": "gemm_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "dedisp_milo.json"
-            },
-            {
-                "name": "hotspot_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "dedisp_milo.json"
-            }
-        ],
-        "gpus": [
-            "A100",
-            "A4000",
-            "MI50",
-            "MI250X",
-            "W6600"
-        ],
-        "pattern_for_full_search_space_filenames": {
-            "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json"
-        },
-        "stochastic": true,
-        "repeats": 25,
-        "samples": 1,
-        "minimum_number_of_valid_search_iterations": 20,
-        "ignore_cache": false
-    },
-    "search_strategies": [
-        {
-            "autotuner": "KernelTuner",
-            "name": "brute_force",
-            "display_name": "Brute force",
-            "search_method": "brute_force"
-        }
-    ],
-    "statistics_settings": {
-        "minimization": true,
-        "cutoff_percentile": 0.96,
-        "cutoff_percentile_start": 0.5,
-        "cutoff_type": "fevals",
-        "objective_time_keys": [
-            "all"
-        ],
-        "objective_performance_keys": [
-            "time"
-        ]
-    },
-    "visualization_settings": {
-        "plots": [
-            {
-                "scope": "searchspace",
-                "style": "line",
-                "x_axis_value_types": [
-                    "fevals"
-                ],
-                "y_axis_value_types": [
-                    "normalized",
-                    "baseline"
-                ]
-            },
-            {
-                "scope": "searchspace",
-                "style": "line",
-                "x_axis_value_types": [
-                    "time"
-                ],
-                "y_axis_value_types": [
-                    "normalized",
-                    "baseline"
-                ]
-            },
-            {
-                "scope": "aggregate",
-                "style": "line"
-            }
-        ],
-        "resolution": 1000.0,
-        "confidence_level": 0.95,
-        "compare_baselines": false,
-        "compare_split_times": false
-    }
-}
\ No newline at end of file
diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 65a263ce1..53e5dd6da 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -61,20 +61,17 @@ def compile(self, kernel_instance):
         path.mkdir(exist_ok=True)
 
         # TODO get applications & GPUs args from benchmark
-        gpus = ["RTX_3090", "RTX_2080_Ti"]
-        applications = None
-        # applications = [
-        #     {
-        #         "name": "convolution",
-        #         "folder": "./cached_data_used/kernels",
-        #         "input_file": "convolution.json"
-        #     },
-        #     {
-        #         "name": "pnpoly",
-        #         "folder": "./cached_data_used/kernels",
-        #         "input_file": "pnpoly.json"
-        #     }
-        # ]
+        # gpus = ["RTX_3090", "RTX_2080_Ti"]
+        # applications = None
+
+        gpus = ["A100", "A4000", "MI50", "MI250X", "W6600"]
+        applications = [
+            {
+                "name": "convolution_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "convolution_milo.json"
+            }
+        ]
 
         # strategy settings
         strategy: str = kernel_instance.arguments[0]
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 08d998dd3..8c0fb5d4e 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -60,6 +60,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
 
     # pass a temporary cache file to avoid duplicate execution
     cachefile = get_random_unique_filename('temp_', '.json')
+    cachefile = Path("hyperparamtuning_milo_bruteforce.json")
     kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
@@ -68,7 +69,7 @@ def put_if_not_present(target_dict, key, value):
     put_if_not_present(kwargs, "verbose", True)
     put_if_not_present(kwargs, "quiet", False)
     kwargs['simulation_mode'] = False
-    kwargs['strategy'] = 'dual_annealing'
+    kwargs['strategy'] = 'brute_force'
     kwargs['verify'] = None
     arguments = [target_strategy]
 

From b0e457325aaaca66fc3a67c4b2163e8d33bc413e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 17:40:37 -0700
Subject: [PATCH 019/253] Complex restrictions with tunable parameters provided
 are compiled

---
 kernel_tuner/searchspace.py | 5 +++--
 kernel_tuner/util.py        | 9 +++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index e36fca54e..cc569abc5 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -66,7 +66,8 @@ def __init__(
         restrictions = [restrictions] if not isinstance(restrictions, list) else restrictions
         if (
             len(restrictions) > 0
-            and any(isinstance(restriction, str) for restriction in restrictions)
+            and (any(isinstance(restriction, str) for restriction in restrictions)
+            or any(isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple)))
             and not (framework_l == "pysmt" or framework_l == "bruteforce")
         ):
             self.restrictions = compile_restrictions(
@@ -388,7 +389,7 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem:
                     all_params_required = all(param_name in required_params for param_name in self.param_names)
                     parameter_space.addConstraint(restriction, None if all_params_required else required_params)
                 else:
-                    raise ValueError(f"Unrecognized restriction {restriction}")
+                    raise ValueError(f"Unrecognized restriction type {type(restriction)} ({restriction})")
 
         # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!)
         elif callable(self.restrictions):
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 2ac9498e4..e8d194e11 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1037,8 +1037,11 @@ def to_equality_constraint(
                     # check if we can turn this into the built-in equality comparison constraint
                     finalized_constraint = to_equality_constraint(parsed_restriction, params_used)
             if finalized_constraint is None:
-                # we must turn it into a general function
-                finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n"
+                if parsed_restriction.startswith("def r("):
+                    finalized_constraint = parsed_restriction
+                else:
+                    # we must turn it into a general function
+                    finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n"
             parsed_restrictions.append((finalized_constraint, params_used))
     else:
         # create one monolithic function
@@ -1075,6 +1078,8 @@ def compile_restrictions(
     restrictions: list, tune_params: dict, monolithic=False, format=None, try_to_constraint=True
 ) -> list[tuple[Union[str, Constraint, FunctionType], list[str]]]:
     """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used, or a single Function if monolithic is true."""
+    # change tuples consisting of strings and tunable parameters to only strings to compile
+    restrictions = [r[0] if isinstance(r, tuple) and len(r) == 2 and isinstance(r[0], str) and isinstance(r[1], list) else r for r in restrictions]
     # filter the restrictions to get only the strings
     restrictions_str, restrictions_ignore = [], []
     for r in restrictions:

From 0a2748d15ce0609665ad87a048332a3cc0bd4473 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 31 Oct 2024 13:00:49 -0700
Subject: [PATCH 020/253] Made original BO compatible with Searchspaces

---
 kernel_tuner/strategies/bayes_opt.py | 56 ++++++++++++++--------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index c384ecb97..dd0551740 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -93,9 +93,6 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     """
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    # limit max_fevals to max size of the parameter space
-    max_fevals = min(searchspace.size, max_fevals)
-
     prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
     if not bayes_opt_present:
         raise ImportError(
@@ -571,8 +568,8 @@ def __optimize(self, max_fevals):
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
             list_of_acquisition_values = self.__af(predictions, hyperparam)
             # afterwards select the best AF value
             best_af = self.argopt(list_of_acquisition_values)
@@ -606,8 +603,8 @@ def __optimize_multi(self, max_fevals):
             time_start = time.perf_counter_ns()
             # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
             aqfs = self.multi_afs
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             time_predictions = time.perf_counter_ns()
@@ -728,19 +725,19 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                 raise ValueError(self.error_message_searchspace_fully_observed)
             observations_median = np.median(self.__valid_observations)
             if increase_precision is False:
-                predictions = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(predictions[1])
+                predictions, _, std = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(std)
             for af_index, af in enumerate(aqfs):
                 if af_index in skip_af_index:
                     continue
                 if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
                     break
                 if increase_precision is True:
-                    predictions = self.predict_list(self.unvisited_cache)
+                    predictions, _, std = self.predict_list(self.unvisited_cache)
                     hyperparam = self.contextual_variance(std)
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                # del predictions[best_af]  # to avoid going out of bounds
+                del predictions[best_af]  # to avoid going out of bounds
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -830,8 +827,8 @@ def __optimize_multi_fast(self, max_fevals):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             for af in aqfs:
@@ -855,37 +852,42 @@ def af_random(self, predictions=None, hyperparam=None) -> list:
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for x_mu, x_std in predictions[0])
+        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for (x_mu, x_std) in predictions)
 
         # compute probability of improvement with CDF in bulk
         list_prob_improvement = norm.cdf(list_diff_improvement)
+
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
-        if len(predictions) == 3:
-            predictions, x_mu, x_std = predictions
-        elif len(predictions) == 2:
-            x_mu, x_std = predictions
-        else:
-            raise ValueError(f"Invalid predictions size {len(predictions)}")
 
         # precompute difference of improvement, CDF and PDF in bulk
         list_diff_improvement = list((fplus - x_mu) / (x_std + 1e-9) for (x_mu, x_std) in predictions)
         list_cdf = norm.cdf(list_diff_improvement)
         list_pdf = norm.pdf(list_diff_improvement)
 
-        # compute expected improvement in bulk
-        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
+        # specify AF calculation
+        def exp_improvement(index) -> float:
+            x_mu, x_std = predictions[index]
+            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
+            return -ei
+
+        # calculate AF
+        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
@@ -896,16 +898,16 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         beta = hyperparam
-        _, x_mu, x_std = predictions
 
         # compute LCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
-        _, x_mu, x_std = predictions
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
 
@@ -917,7 +919,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         beta = np.sqrt(zeta * (2 * np.log((t ** (d / 2.0 + 2)) * (np.pi**2) / (3.0 * delta))))
 
         # compute UCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
@@ -938,4 +940,4 @@ def visualize_after_opt(self):
         plt.plot(x_axis, mu, label="predictions", linestyle=" ", marker=".")
         plt.plot(x_axis, brute_force_observations, label="actual", linestyle=" ", marker=".")
         plt.legend()
-        plt.show()
+        plt.show()
\ No newline at end of file

From 6354f4d6bb16b37631de898eed1de5ff45f1a1b6 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 31 Oct 2024 13:29:37 -0700
Subject: [PATCH 021/253] Implemented a new acquisition function that takes the
 ratio between prediction and evaluation into account to be more efficient

---
 kernel_tuner/strategies/bayes_opt.py | 45 ++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index dd0551740..47f82e3a9 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -24,7 +24,7 @@
 
 from kernel_tuner import util
 
-supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
+supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"]
 
 
 def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
@@ -162,7 +162,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     covariancelengthscale=("The covariance length scale", 1.5),
     method=(
         "The Bayesian Optimization method to use, choose any from " + ", ".join(supported_methods),
-        "multi-advanced",
+        "multi-ultrafast",
     ),
     samplingmethod=(
         "Method used for initial sampling the parameter space, either random or Latin Hypercube Sampling (LHS)",
@@ -199,7 +199,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         # get hyperparameters
         cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
         cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
-        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
+        acquisition_function = get_hyperparam("method", "multi-ultrafast", self.supported_methods)
         acq = acquisition_function
         acq_params = get_hyperparam("methodparams", {})
         multi_af_names = get_hyperparam("multi_af_names", ["ei", "poi", "lcb"])
@@ -342,6 +342,8 @@ def set_acquisition_function(self, acquisition_function: str):
             self.optimize = self.__optimize_multi_advanced
         elif acquisition_function == "multi-fast":
             self.optimize = self.__optimize_multi_fast
+        elif acquisition_function == "multi-ultrafast":
+            self.optimize = self.__optimize_multi_ultrafast
         else:
             raise ValueError(
                 "Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function)
@@ -843,6 +845,43 @@ def __optimize_multi_fast(self, max_fevals):
                 self.update_after_evaluation(observation, candidate_index, candidate_params)
             self.fit_observations_to_model()
 
+    def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, or fewer if predictions take too long.
+        
+        The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick. 
+        Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago. 
+        """
+        last_prediction_counter = 0
+        last_prediction_time = 0
+        last_eval_time = 0
+        while self.fevals < max_fevals:
+            aqfs = self.multi_afs
+            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
+            if last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio:
+                last_prediction_counter = 0
+                pred_start = time.perf_counter()
+                if last_eval_time > 0.0:
+                    self.fit_observations_to_model()
+                predictions, _, std = self.predict_list(self.unvisited_cache)
+                last_prediction_time = time.perf_counter() - pred_start
+            else:
+                last_prediction_counter += 1
+            eval_start = time.perf_counter()
+            hyperparam = self.contextual_variance(std)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            for af in aqfs:
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[best_af]  # to avoid going out of bounds
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+            last_eval_time = time.perf_counter() - eval_start
+
     def af_random(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = range(len(self.unvisited_cache))

From 540151968ac9a8dec235c350bb4d088b3cf7c54c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 09:04:59 -0800
Subject: [PATCH 022/253] Changed supported Python versions to include 3.13,
 updated dependencies

---
 doc/requirements.txt | 175 +++++++++++++++++++++----------------------
 noxfile.py           |   2 +-
 pyproject.toml       |   7 +-
 3 files changed, 91 insertions(+), 93 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 766ee148d..b47d8ddaf 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,89 +1,86 @@
-alabaster==0.7.16 ; python_version >= "3.9" and python_version < "3.13"
-asttokens==2.4.1 ; python_version >= "3.9" and python_version < "3.13"
-attrs==23.2.0 ; python_version >= "3.9" and python_version < "3.13"
-babel==2.15.0 ; python_version >= "3.9" and python_version < "3.13"
-beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "3.13"
-bleach==6.1.0 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
-cffi==1.16.0 ; python_version >= "3.9" and python_version < "3.13" and implementation_name == "pypy"
-charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-decorator==5.1.1 ; python_version >= "3.9" and python_version < "3.13"
-defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "3.13"
-docutils==0.20.1 ; python_version >= "3.9" and python_version < "3.13"
-dom-toml==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-domdf-python-tools==3.8.1 ; python_version >= "3.9" and python_version < "3.13"
-exceptiongroup==1.2.1 ; python_version >= "3.9" and python_version < "3.11"
-executing==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
-fastjsonschema==2.19.1 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
-imagesize==1.4.1 ; python_version >= "3.9" and python_version < "3.13"
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.10"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-ipython==8.18.1 ; python_version >= "3.9" and python_version < "3.13"
-jedi==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
-jinja2==3.1.4 ; python_version >= "3.9" and python_version < "3.13"
-joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.13"
-jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "3.13"
-jsonschema==4.22.0 ; python_version >= "3.9" and python_version < "3.13"
-jupyter-client==8.6.2 ; python_version >= "3.9" and python_version < "3.13"
-jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "3.13"
-jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "3.13"
-markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13"
-matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "3.13"
-mistune==3.0.2 ; python_version >= "3.9" and python_version < "3.13"
-natsort==8.4.0 ; python_version >= "3.9" and python_version < "3.13"
-nbclient==0.10.0 ; python_version >= "3.9" and python_version < "3.13"
-nbconvert==7.16.4 ; python_version >= "3.9" and python_version < "3.13"
-nbformat==5.10.4 ; python_version >= "3.9" and python_version < "3.13"
-nbsphinx==0.9.4 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
-packaging==24.0 ; python_version >= "3.9" and python_version < "3.13"
-pandas==2.2.2 ; python_version >= "3.9" and python_version < "3.13"
-pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "3.13"
-parso==0.8.4 ; python_version >= "3.9" and python_version < "3.13"
-pexpect==4.9.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform != "win32"
-platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "3.13"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
-prompt-toolkit==3.0.43 ; python_version >= "3.9" and python_version < "3.13"
-ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform != "win32"
-pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "3.13"
-pycparser==2.22 ; python_version >= "3.9" and python_version < "3.13" and implementation_name == "pypy"
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pytest==8.2.1 ; python_version >= "3.9" and python_version < "3.13"
-python-constraint2==2.0.0b5 ; python_version >= "3.9" and python_version < "3.13"
-python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.13"
-pytz==2024.1 ; python_version >= "3.9" and python_version < "3.13"
-pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.13"
-pyzmq==26.0.3 ; python_version >= "3.9" and python_version < "3.13"
-referencing==0.35.1 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.32.2 ; python_version >= "3.9" and python_version < "3.13"
-rpds-py==0.18.1 ; python_version >= "3.9" and python_version < "3.13"
-scikit-learn==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
-six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
-snowballstemmer==2.2.0 ; python_version >= "3.9" and python_version < "3.13"
-soupsieve==2.5 ; python_version >= "3.9" and python_version < "3.13"
-sphinx-pyproject==0.3.0 ; python_version >= "3.9" and python_version < "3.13"
-sphinx-rtd-theme==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-sphinx==7.3.7 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-applehelp==1.0.8 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-devhelp==1.0.6 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-htmlhelp==2.0.5 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-jquery==4.1 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-qthelp==1.0.7 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-serializinghtml==1.1.10 ; python_version >= "3.9" and python_version < "3.13"
-stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.13"
-threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.13"
-tinycss2==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
-tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
-tornado==6.4 ; python_version >= "3.9" and python_version < "3.13"
-traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.12.0 ; python_version >= "3.9" and python_version < "3.13"
-tzdata==2024.1 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
-wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.13"
-webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.13"
-xmltodict==0.13.0 ; python_version >= "3.9" and python_version < "3.13"
-zipp==3.18.2 ; python_version >= "3.9" and python_version < "3.10"
+alabaster==0.7.16 ; python_version >= "3.10" and python_version < "3.14"
+asttokens==2.4.1 ; python_version >= "3.10" and python_version < "3.14"
+attrs==24.2.0 ; python_version >= "3.10" and python_version < "3.14"
+babel==2.16.0 ; python_version >= "3.10" and python_version < "3.14"
+beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.14"
+bleach==6.2.0 ; python_version >= "3.10" and python_version < "3.14"
+certifi==2024.8.30 ; python_version >= "3.10" and python_version < "3.14"
+cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.14" and implementation_name == "pypy"
+charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "3.14"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "win32"
+decorator==5.1.1 ; python_version >= "3.10" and python_version < "3.14"
+defusedxml==0.7.1 ; python_version >= "3.10" and python_version < "3.14"
+docutils==0.20.1 ; python_version >= "3.10" and python_version < "3.14"
+dom-toml==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+domdf-python-tools==3.9.0 ; python_version >= "3.10" and python_version < "3.14"
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.1.0 ; python_version >= "3.10" and python_version < "3.14"
+fastjsonschema==2.20.0 ; python_version >= "3.10" and python_version < "3.14"
+idna==3.10 ; python_version >= "3.10" and python_version < "3.14"
+imagesize==1.4.1 ; python_version >= "3.10" and python_version < "3.14"
+iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+ipython==8.18.1 ; python_version >= "3.10" and python_version < "3.14"
+jedi==0.19.1 ; python_version >= "3.10" and python_version < "3.14"
+jinja2==3.1.4 ; python_version >= "3.10" and python_version < "3.14"
+joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.14"
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version < "3.14"
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "3.14"
+jupyter-client==8.6.3 ; python_version >= "3.10" and python_version < "3.14"
+jupyter-core==5.7.2 ; python_version >= "3.10" and python_version < "3.14"
+jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version < "3.14"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.14"
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version < "3.14"
+mistune==3.0.2 ; python_version >= "3.10" and python_version < "3.14"
+natsort==8.4.0 ; python_version >= "3.10" and python_version < "3.14"
+nbclient==0.10.0 ; python_version >= "3.10" and python_version < "3.14"
+nbconvert==7.16.4 ; python_version >= "3.10" and python_version < "3.14"
+nbformat==5.10.4 ; python_version >= "3.10" and python_version < "3.14"
+nbsphinx==0.9.5 ; python_version >= "3.10" and python_version < "3.14"
+numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.14"
+packaging==24.1 ; python_version >= "3.10" and python_version < "3.14"
+pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.14"
+pandocfilters==1.5.1 ; python_version >= "3.10" and python_version < "3.14"
+parso==0.8.4 ; python_version >= "3.10" and python_version < "3.14"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version < "3.14" and sys_platform != "win32"
+platformdirs==4.3.6 ; python_version >= "3.10" and python_version < "3.14"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version < "3.14"
+prompt-toolkit==3.0.48 ; python_version >= "3.10" and python_version < "3.14"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "3.14" and sys_platform != "win32"
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version < "3.14"
+pycparser==2.22 ; python_version >= "3.10" and python_version < "3.14" and implementation_name == "pypy"
+pygments==2.18.0 ; python_version >= "3.10" and python_version < "3.14"
+pytest==8.3.3 ; python_version >= "3.10" and python_version < "3.14"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.14"
+pytz==2024.2 ; python_version >= "3.10" and python_version < "3.14"
+pywin32==308 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.14"
+pyzmq==26.2.0 ; python_version >= "3.10" and python_version < "3.14"
+referencing==0.35.1 ; python_version >= "3.10" and python_version < "3.14"
+requests==2.32.3 ; python_version >= "3.10" and python_version < "3.14"
+rpds-py==0.20.1 ; python_version >= "3.10" and python_version < "3.14"
+scikit-learn==1.5.2 ; python_version >= "3.10" and python_version < "3.14"
+scipy==1.13.1 ; python_version >= "3.10" and python_version < "3.14"
+six==1.16.0 ; python_version >= "3.10" and python_version < "3.14"
+snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version < "3.14"
+soupsieve==2.6 ; python_version >= "3.10" and python_version < "3.14"
+sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinx==7.4.7 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version < "3.14"
+threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.14"
+tinycss2==1.4.0 ; python_version >= "3.10" and python_version < "3.14"
+tomli==2.0.2 ; python_version >= "3.10" and python_version < "3.14"
+tornado==6.4.1 ; python_version >= "3.10" and python_version < "3.14"
+traitlets==5.14.3 ; python_version >= "3.10" and python_version < "3.14"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.14"
+tzdata==2024.2 ; python_version >= "3.10" and python_version < "3.14"
+urllib3==2.2.3 ; python_version >= "3.10" and python_version < "3.14"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "3.14"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version < "3.14"
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version < "3.14"
diff --git a/noxfile.py b/noxfile.py
index 75c9ea902..016cf1cdd 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -15,7 +15,7 @@
 
 # set the test parameters
 verbose = False
-python_versions_to_test = ["3.9", "3.10", "3.11", "3.12"]
+python_versions_to_test = ["3.9", "3.10", "3.11", "3.12", "3.13"]
 nox.options.stop_on_first_error = True
 nox.options.error_on_missing_interpreters = True
 nox.options.default_venv_backend = 'virtualenv'
diff --git a/pyproject.toml b/pyproject.toml
index 323978437..8eb1fca51 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,12 +58,13 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.9,<3.13"          # NOTE when changing the supported Python versions, also change the test versions in the noxfile
-numpy = "^1.26.0"              # Python 3.12 requires numpy at least 1.26
+python = ">=3.10,<3.14"         # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+numpy = "^1.26.0"               # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
+ax = ">=0.4.3"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
-python-constraint2 = "^2.0.0b5"
+python-constraint2 = "^2.0.0b8"
 xmltodict = "*"
 pandas = ">=2.0.0"
 scikit-learn = ">=1.0.2"

From 04eacc455c2f71369d8b60a00fd9ac022df10cc4 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 19:10:33 -0800
Subject: [PATCH 023/253] Setup Searchspace to Ax SearchSpace conversion

---
 kernel_tuner/searchspace.py | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index cc569abc5..30ea2af03 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -50,9 +50,9 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.restrictions = restrictions
+        self.restrictions = restrictions.copy()
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions
+        self._modified_restrictions = restrictions.copy()
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None
@@ -788,3 +788,36 @@ def order_param_configs(
                 f"The number of ordered parameter configurations ({len(ordered_param_configs)}) differs from the original number of parameter configurations ({len(param_configs)})"
             )
         return ordered_param_configs
+    
+    def to_ax_searchspace(self):
+        """Convert this searchspace to an Ax SearchSpace."""
+        from ax import ChoiceParameter, FixedParameter, ParameterType, SearchSpace
+
+        # create searchspace
+        ax_searchspace = SearchSpace([])
+
+        # add the parameters
+        for param_name, param_values in self.tune_params.items():
+            if len(param_values) == 0:
+                continue
+
+            # convert the types
+            assert all(isinstance(param_values[0], type(v)) for v in param_values), f"Parameter values of mixed types are not supported: {param_values}"
+            param_type_mapping = {
+                str: ParameterType.STRING,
+                int: ParameterType.INT,
+                float: ParameterType.FLOAT,
+                bool: ParameterType.BOOL
+            }
+            param_type = param_type_mapping[type(param_values[0])]
+
+            # add the parameter
+            if len(param_values) == 1:
+                ax_searchspace.add_parameter(FixedParameter(param_name, param_type, param_values[0]))
+            else:
+                ax_searchspace.add_parameter(ChoiceParameter(param_name, param_type, param_values))
+
+        # add the constraints
+        raise NotImplementedError("Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity.")
+
+        return ax_searchspace

From 5f31dfcdd65a17079af5c454ee8734de14440d4e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 19:20:24 -0800
Subject: [PATCH 024/253] Implemented Ax as a BO strategy

---
 kernel_tuner/strategies/bayes_opt_ax.py | 29 +++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 kernel_tuner/strategies/bayes_opt_ax.py

diff --git a/kernel_tuner/strategies/bayes_opt_ax.py b/kernel_tuner/strategies/bayes_opt_ax.py
new file mode 100644
index 000000000..234c882c4
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_ax.py
@@ -0,0 +1,29 @@
+"""The strategy that uses particle swarm optimization."""
+
+from ax import optimize
+
+from kernel_tuner import util
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.common import (
+    CostFunc,
+)
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
+
+    ax_searchspace = searchspace.to_ax_searchspace()
+
+    try:
+        best_parameters, best_values, experiment, model = optimize(
+            parameters=ax_searchspace.parameters,
+            parameter_constraints=ax_searchspace.parameter_constraints,
+            # Booth function
+            evaluation_function=cost_func,
+            minimize=True,
+        )
+    except util.StopCriterionReached as e:
+        if tuning_options.verbose:
+            print(e)
+
+    return cost_func.results

From 2e4f490c6c1a3d7fef7af79ac4e842701ed0cdbf Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:34:33 -0800
Subject: [PATCH 025/253] Made BO compatible with StopCriterion

---
 kernel_tuner/strategies/bayes_opt.py | 22 ++++++++++++++--------
 pyproject.toml                       |  2 +-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 47f82e3a9..e4c9c52a2 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -1,4 +1,5 @@
 """Bayesian Optimization implementation from the thesis by Willemsen."""
+
 import itertools
 import time
 import warnings
@@ -13,6 +14,7 @@
 # BO imports
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import CostFunc
+from kernel_tuner.util import StopCriterionReached
 
 try:
     from sklearn.gaussian_process import GaussianProcessRegressor
@@ -137,11 +139,12 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         bo = BayesianOptimization(
             parameter_space, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func
         )
-    except util.StopCriterionReached as e:
-        print(
+    except StopCriterionReached:
+        warnings.warn(
             "Stop criterion reached during initialization, was popsize (default 20) greater than max_fevals or the alotted time?"
         )
-        raise e
+        return cost_func.results
+        # raise e
     try:
         if max_fevals - bo.fevals <= 0:
             raise ValueError("No function evaluations left for optimization after sampling")
@@ -847,9 +850,9 @@ def __optimize_multi_fast(self, max_fevals):
 
     def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
         """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, or fewer if predictions take too long.
-        
-        The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick. 
-        Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago. 
+
+        The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick.
+        Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago.
         """
         last_prediction_counter = 0
         last_prediction_time = 0
@@ -857,7 +860,10 @@ def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            if last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio:
+            if (
+                last_prediction_time * predict_eval_ratio <= last_eval_time
+                or last_prediction_counter >= predict_eval_ratio
+            ):
                 last_prediction_counter = 0
                 pred_start = time.perf_counter()
                 if last_eval_time > 0.0:
@@ -979,4 +985,4 @@ def visualize_after_opt(self):
         plt.plot(x_axis, mu, label="predictions", linestyle=" ", marker=".")
         plt.plot(x_axis, brute_force_observations, label="actual", linestyle=" ", marker=".")
         plt.legend()
-        plt.show()
\ No newline at end of file
+        plt.show()
diff --git a/pyproject.toml b/pyproject.toml
index 8eb1fca51..6a53b8556 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,7 +61,7 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 python = ">=3.10,<3.14"         # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"               # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
-ax = ">=0.4.3"
+ax-platform = ">=0.4.3"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
 python-constraint2 = "^2.0.0b8"

From 705e724622b69c14b9a131f6b26a8e17d7ab1e9e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:35:24 -0800
Subject: [PATCH 026/253] Minor compatbility change to BO strategies

---
 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py |  2 +-
 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py   | 14 +++-----------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index cc991dadf..d584c0e3b 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -694,7 +694,7 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
         return self.all_results
 
     def objective_function(self, param_config: tuple) -> float:
-        return self.runner.run(param_config, self.tuning_options)
+        return self.runner.run([param_config], self.tuning_options)
 
     def evaluate_config(self, param_config_index: int) -> float:
         """Evaluates a parameter configuration, returns the time."""
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
index 891db5236..cf733cdde 100644
--- a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
@@ -1,17 +1,10 @@
-""" BOTorch package from https://github.com/pytorch/botorch """
+"""BOTorch package from https://github.com/pytorch/botorch."""
 from __future__ import print_function
 
 from collections import OrderedDict
-import numpy as np
 
 try:
-    import torch
-    from botorch.models import SingleTaskGP
-    from botorch.fit import fit_gpytorch_model
-    from botorch.utils import standardize
-    from gpytorch.mlls import ExactMarginalLogLikelihood
-    from botorch.acquisition import UpperConfidenceBound
-    from botorch.optim import optimize_acqf
+    pass
 except Exception:
     BayesianOptimization = None
     bayes_opt_present = False
@@ -22,7 +15,7 @@
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -44,7 +37,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
-
     if not bayes_opt_present:
         raise ImportError("Error: optional dependency Bayesian Optimization not installed")
     init_points = tuning_options.strategy_options.get("popsize", 20)

From 6cde57e59aefd83c2edbf312859c1cd3f100c7c2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:37:05 -0800
Subject: [PATCH 027/253] Extended hyperparameter tuning benchmark

---
 kernel_tuner/backends/hypertuner.py     |  7 ++++++-
 kernel_tuner/hyper.py                   | 27 ++++++++++++++++---------
 kernel_tuner/strategies/bayes_opt_ax.py |  2 +-
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 53e5dd6da..0b1c69adb 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -64,8 +64,13 @@ def compile(self, kernel_instance):
         # gpus = ["RTX_3090", "RTX_2080_Ti"]
         # applications = None
 
-        gpus = ["A100", "A4000", "MI50", "MI250X", "W6600"]
+        gpus = ["A100", "A4000", "MI250X", "W6600"]
         applications = [
+            {
+                "name": "dedispersion_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "dedispersion_milo.json"
+            },
             {
                 "name": "convolution_milo",
                 "folder": "../autotuning_methodology/cached_data_used/kernels",
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 8c0fb5d4e..3d2dfffa7 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -60,7 +60,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
 
     # pass a temporary cache file to avoid duplicate execution
     cachefile = get_random_unique_filename('temp_', '.json')
-    cachefile = Path("hyperparamtuning_milo_bruteforce.json")
+    cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
     kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
@@ -78,7 +78,7 @@ def put_if_not_present(target_dict, key, value):
                                     objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs)
     
     # remove the temporary cachefile and return only unique results in order
-    cachefile.unlink()
+    # cachefile.unlink()
     result_unique = dict()
     for r in result:
         config_id = ",".join(str(r[k]) for k in hyper_params.keys())
@@ -87,14 +87,15 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
-    hyperparams = {
-        'popsize': [10, 20, 30],
-        'maxiter': [50, 100, 150],
-        'w': [0.25, 0.5, 0.75],
-        'c1': [1.0, 2.0, 3.0],
-        'c2': [0.5, 1.0, 1.5]
-    }
-    result, env = tune_hyper_params('pso', hyperparams)
+    # hyperparams = {
+    #     'popsize': [10, 20, 30],
+    #     'maxiter': [50, 100, 150],
+    #     'w': [0.25, 0.5, 0.75],
+    #     'c1': [1.0, 2.0, 3.0],
+    #     'c2': [0.5, 1.0, 1.5]
+    # }
+    # result, env = tune_hyper_params('pso', hyperparams)
+
     # hyperparams = {
     #     'neighbor': ['Hamming', 'adjacent'],
     #     'restart': [True, False],
@@ -102,5 +103,11 @@ def put_if_not_present(target_dict, key, value):
     #     'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
     # }
     # result, env = tune_hyper_params('greedy_ils', hyperparams)
+
+    hyperparams = {
+        'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
+    }
+    result, env = tune_hyper_params('dual_annealing', hyperparams)
+
     print(result)
     print(env['best_config'])
diff --git a/kernel_tuner/strategies/bayes_opt_ax.py b/kernel_tuner/strategies/bayes_opt_ax.py
index 234c882c4..2bb3ce8fc 100644
--- a/kernel_tuner/strategies/bayes_opt_ax.py
+++ b/kernel_tuner/strategies/bayes_opt_ax.py
@@ -1,4 +1,4 @@
-"""The strategy that uses particle swarm optimization."""
+"""Bayesian Optimization implementation using the Ax platform."""
 
 from ax import optimize
 

From aed5f0d430f5830d6a7fc7bf400fcc3c782142ce Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:38:07 -0800
Subject: [PATCH 028/253] Implemented Bayesian Optimization using BOTorch

---
 kernel_tuner/interface.py                    |  6 +-
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 76 ++++++++++++++++++++
 2 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 2bfa06a89..e9469ec6d 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -51,6 +51,7 @@
     basinhopping,
     bayes_opt,
     bayes_opt_alt_BOTorch,
+    bayes_opt_BOTorch,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -88,7 +89,8 @@
     "bayes_opt_old": bayes_opt_old,
     "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
-    "bayes_opt_BOTorch": bayes_opt_alt_BOTorch,
+    "bayes_opt_BOTorch": bayes_opt_BOTorch,
+    "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
 }
 
 
@@ -618,7 +620,7 @@ def tune_kernel(
         and not callable(restrictions)
         and not any(isinstance(r, Constraint) for r in restrictions)
     ):
-        restrictions = util.parse_restrictions(restrictions, tune_params)
+        restrictions = util.compile_restrictions(restrictions, tune_params)
 
     # sort all the options into separate dicts
     opts = locals()
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
new file mode 100644
index 000000000..d7a88bab5
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -0,0 +1,76 @@
+"""Bayesian Optimization implementation using BO Torch."""
+
+import numpy as np
+import torch
+from botorch import fit_gpytorch_model
+from botorch.acquisition import ExpectedImprovement
+from botorch.models import SingleTaskGP
+from botorch.optim import optimize_acqf_discrete
+from gpytorch.mlls import ExactMarginalLogLikelihood
+from torch import Tensor
+
+from kernel_tuner import util
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.common import (
+    CostFunc,
+)
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+
+    # function to optimize
+    def evaluate_function(X):
+        if isinstance(X, (Tensor, list)):
+            results = []
+            if X.dim() == 1:
+                results = [[cost_func(X)]]
+            else:
+                results = [[cost_func(c)] for c in X]
+            return torch.from_numpy(np.array(results))
+        else:
+            raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
+
+    # set up conversion to tensors
+    full_space = torch.from_numpy(searchspace.get_list_numpy().astype(float))
+
+    # get bounds
+    bounds = []
+    for v in searchspace.params_values:
+        bounds.append([min(v), max(v)])
+    bounds = torch.from_numpy(np.array(bounds).transpose())
+
+    try:
+        # take initial sample
+        sample_indices = torch.from_numpy(searchspace.get_random_sample_indices(initial_sample_size))
+        train_X = full_space.index_select(0, sample_indices)
+        train_Y = evaluate_function(train_X)
+
+        # Bayesian optimization loop
+        for _ in range(max_fevals):
+            # Fit a Gaussian Process model
+            gp = SingleTaskGP(train_X, train_Y)
+            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
+            fit_gpytorch_model(mll)
+            
+            # Define the acquisition function
+            ei = ExpectedImprovement(model=gp, best_f=train_Y.min(), maximize=False)
+            
+            # Optimize acquisition function to find the next evaluation point
+            candidate, _ = optimize_acqf_discrete(
+                ei, 
+                q=1, 
+                choices=full_space
+            )
+            
+            # Evaluate the new candidate and update the dataset
+            new_y = evaluate_function(candidate)
+            train_X = torch.cat([train_X, candidate])
+            train_Y = torch.cat([train_Y, new_y])
+    except util.StopCriterionReached as e:
+        if tuning_options.verbose:
+            print(e)
+
+    return cost_func.results

From 8c0dc497645b58f2640d91cbf735161c48144fdc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:13:01 -0800
Subject: [PATCH 029/253] Automatically time out any PyTest that takes longer
 than 60 seconds

---
 pyproject.toml   | 3 ++-
 test/conftest.py | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 test/conftest.py

diff --git a/pyproject.toml b/pyproject.toml
index 6a53b8556..3d8511493 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -114,13 +114,14 @@ markupsafe = "^2.0.1"       # TODO why do we need markupsafe here?
 optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^8.2.0"
+pytest-timeout = "^2.3.1"
 pytest-cov = "^5.0.0"
 mock = "^5.1.0"
 nox = "^2024.4.15"
 nox-poetry = "^1.0.3"
 ruff = "^0.4.4"
 pep440 = "^0.1.2"
-tomli = "^2.0.1"      # held back by Python <= 3.10, can be replaced by built-in [tomllib](https://docs.python.org/3.11/library/tomllib.html) from Python 3.11 onwards
+tomli = "^2.0.1"          # held back by Python <= 3.10, can be replaced by built-in [tomllib](https://docs.python.org/3.11/library/tomllib.html) from Python 3.11 onwards
 
 # development dependencies are unused for now, as this is already covered by test and docs
 # # ATTENTION: if anything is changed here, run `poetry update`
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..1539a6cdf
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+
+
+def pytest_collection_modifyitems(items):
+    for item in items:
+        if item.get_closest_marker('timeout') is None:
+            item.add_marker(pytest.mark.timeout(60))
\ No newline at end of file

From b9b748d8ee22a633e14840ca81972a3bce56b6ac Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:15:06 -0800
Subject: [PATCH 030/253] Avoided inadvertent use of cache in
 hyperparametertuning tests

---
 test/strategies/test_strategies.py | 2 ++
 test/test_hyper.py                 | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 57c43b4f7..11b231e62 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -37,6 +37,8 @@ def vector_add():
 # skip some strategies if their dependencies are not installed
 strategies = []
 for s in strategy_map.keys():
+    if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower() or 'bayes_opt_old' in s.lower():
+        continue
     if 'gpytorch' in s.lower():
         strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
     elif 'botorch' in s.lower():
diff --git a/test/test_hyper.py b/test/test_hyper.py
index 7aab219ef..d34294585 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -15,6 +15,6 @@ def test_hyper(env):
 
     target_strategy = "genetic_algorithm"
 
-    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True)
+    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True, cache=None)
     assert len(result) == 2
     assert 'best_config' in env

From 177802628483f3a9dff7dea304d8806721ad09fe Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:15:26 -0800
Subject: [PATCH 031/253] Avoided inadvertent use of cache in
 hyperparametertuning tests

---
 kernel_tuner/hyper.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 3d2dfffa7..b84912a8b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -59,9 +59,10 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
         del kwargs['iterations']
 
     # pass a temporary cache file to avoid duplicate execution
-    cachefile = get_random_unique_filename('temp_', '.json')
-    cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
-    kwargs['cache'] = str(cachefile)
+    if 'cache' not in kwargs:
+        cachefile = get_random_unique_filename('temp_', '.json')
+        cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
+        kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
         target_dict[key] = value if key not in target_dict else target_dict[key]

From 034352fbe7f7f27f8d94a4e6efa5a165d3e10b3a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:15:57 -0800
Subject: [PATCH 032/253] Shallow copy if the restrictions are copiable

---
 kernel_tuner/searchspace.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 30ea2af03..ca30e2563 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -50,9 +50,9 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.restrictions = restrictions.copy()
+        self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions.copy()
+        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None

From eba03f83689c70ed3a886cbdaa43a60acaabcfe7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 5 Nov 2024 15:46:12 -0800
Subject: [PATCH 033/253] Refactored BO BOTorch into class structure

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 130 +++++++++++--------
 1 file changed, 77 insertions(+), 53 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index d7a88bab5..6ef703674 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -1,13 +1,18 @@
 """Bayesian Optimization implementation using BO Torch."""
 
 import numpy as np
-import torch
-from botorch import fit_gpytorch_model
-from botorch.acquisition import ExpectedImprovement
-from botorch.models import SingleTaskGP
-from botorch.optim import optimize_acqf_discrete
-from gpytorch.mlls import ExactMarginalLogLikelihood
-from torch import Tensor
+
+try:
+    import torch
+    from botorch import fit_gpytorch_model
+    from botorch.acquisition import ExpectedImprovement
+    from botorch.models import SingleTaskGP
+    from botorch.optim import optimize_acqf_discrete
+    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
@@ -18,59 +23,78 @@
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+    bo = BayesianOptimization(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimization():
+    """Bayesian Optimization class."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        self.initial_sample_taken = False
+        self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
+        self.tuning_options = tuning_options
+        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+
+        # set up conversion to tensors
+        self.searchspace = searchspace
+        self.searchspace_tensors = torch.from_numpy(searchspace.get_list_numpy().astype(float))
+        self.train_X = torch.empty_like(self.searchspace_tensors)
+        self.train_Y = torch.empty(len(self.train_X))
 
-    # function to optimize
-    def evaluate_function(X):
-        if isinstance(X, (Tensor, list)):
+        # get bounds
+        bounds = []
+        for v in searchspace.params_values:
+            bounds.append([min(v), max(v)])
+        bounds = torch.from_numpy(np.array(bounds).transpose())
+
+    def evaluate_configs(self, X: Tensor):
+        """Evaluate a tensor of one or multiple configurations."""
+        if isinstance(X, Tensor):
             results = []
             if X.dim() == 1:
-                results = [[cost_func(X)]]
+                results = [[self.cost_func(X)]]
             else:
-                results = [[cost_func(c)] for c in X]
+                results = [[self.cost_func(c)] for c in X]
             return torch.from_numpy(np.array(results))
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
+        
+    def initial_sample(self):
+        """Take an initial sample."""
+        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size))
+        self.train_X = self.searchspace_tensors.index_select(0, sample_indices)
+        self.train_Y = self.evaluate_configs(self.train_X)
+        self.initial_sample_taken = True
 
-    # set up conversion to tensors
-    full_space = torch.from_numpy(searchspace.get_list_numpy().astype(float))
-
-    # get bounds
-    bounds = []
-    for v in searchspace.params_values:
-        bounds.append([min(v), max(v)])
-    bounds = torch.from_numpy(np.array(bounds).transpose())
-
-    try:
-        # take initial sample
-        sample_indices = torch.from_numpy(searchspace.get_random_sample_indices(initial_sample_size))
-        train_X = full_space.index_select(0, sample_indices)
-        train_Y = evaluate_function(train_X)
+    def run(self, max_fevals: int):
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
 
-        # Bayesian optimization loop
-        for _ in range(max_fevals):
-            # Fit a Gaussian Process model
-            gp = SingleTaskGP(train_X, train_Y)
-            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
-            fit_gpytorch_model(mll)
-            
-            # Define the acquisition function
-            ei = ExpectedImprovement(model=gp, best_f=train_Y.min(), maximize=False)
-            
-            # Optimize acquisition function to find the next evaluation point
-            candidate, _ = optimize_acqf_discrete(
-                ei, 
-                q=1, 
-                choices=full_space
-            )
-            
-            # Evaluate the new candidate and update the dataset
-            new_y = evaluate_function(candidate)
-            train_X = torch.cat([train_X, candidate])
-            train_Y = torch.cat([train_Y, new_y])
-    except util.StopCriterionReached as e:
-        if tuning_options.verbose:
-            print(e)
+            # Bayesian optimization loop
+            for _ in range(max_fevals):
+                # Fit a Gaussian Process model
+                gp = SingleTaskGP(self.train_X, self.train_Y)
+                mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
+                fit_gpytorch_model(mll)
+                
+                # Define the acquisition function
+                ei = ExpectedImprovement(model=gp, best_f=self.train_Y.min(), maximize=False)
+                
+                # Optimize acquisition function to find the next evaluation point
+                candidate, _ = optimize_acqf_discrete(
+                    ei, 
+                    q=1, 
+                    choices=self.searchspace_tensors
+                )
+                
+                # Evaluate the new candidate and update the dataset
+                new_y = self.evaluate_configs(candidate)
+                self.train_X = torch.cat([self.train_X, candidate])
+                self.train_Y = torch.cat([self.train_Y, new_y])
+        except util.StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
 
-    return cost_func.results
+        return self.cost_func.results 

From c6b243ab952d8c80437053dc61fd3933296aa15b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 01:34:44 -0800
Subject: [PATCH 034/253] Switched to newer fit function, more efficient model
 initialization by reusing state

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 23 +++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 6ef703674..c63c836e1 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -4,7 +4,7 @@
 
 try:
     import torch
-    from botorch import fit_gpytorch_model
+    from botorch import fit_gpytorch_mll
     from botorch.acquisition import ExpectedImprovement
     from botorch.models import SingleTaskGP
     from botorch.optim import optimize_acqf_discrete
@@ -66,21 +66,29 @@ def initial_sample(self):
         self.train_Y = self.evaluate_configs(self.train_X)
         self.initial_sample_taken = True
 
+    def initialize_model(self, state_dict=None):
+        """Initialize the model, possibly with a state dict for faster fitting."""
+        model = SingleTaskGP(self.train_X, self.train_Y)
+        mll = ExactMarginalLogLikelihood(model.likelihood, model)
+        # SumMarginalLogLikelihood
+        if state_dict is not None:
+            model.load_state_dict(state_dict)
+        return mll, model
+
     def run(self, max_fevals: int):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
+                mll, model = self.initialize_model()
 
             # Bayesian optimization loop
             for _ in range(max_fevals):
-                # Fit a Gaussian Process model
-                gp = SingleTaskGP(self.train_X, self.train_Y)
-                mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
-                fit_gpytorch_model(mll)
+                # fit a Gaussian Process model
+                fit_gpytorch_mll(mll)
                 
                 # Define the acquisition function
-                ei = ExpectedImprovement(model=gp, best_f=self.train_Y.min(), maximize=False)
+                ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
                 candidate, _ = optimize_acqf_discrete(
@@ -93,6 +101,9 @@ def run(self, max_fevals: int):
                 new_y = self.evaluate_configs(candidate)
                 self.train_X = torch.cat([self.train_X, candidate])
                 self.train_Y = torch.cat([self.train_Y, new_y])
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                mll, model = self.initialize_model(model.state_dict())
         except util.StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From 15818401451abf1e92138c3ad8f0e99e1152ccdb Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 11:45:42 -0800
Subject: [PATCH 035/253] Added option to return invalid configurations in
 CostFunc

---
 kernel_tuner/strategies/common.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 3420c86ea..717d2ca7e 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -53,10 +53,11 @@ def get_options(strategy_options, options):
 
 
 class CostFunc:
-    def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True):
+    def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False):
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
+        self.return_invalid = return_invalid
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
@@ -111,8 +112,11 @@ def __call__(self, x, check_restrictions=True):
             self.runner.last_strategy_start_time = perf_counter()
 
         # get numerical return value, taking optimization direction into account
-        return_value = result[self.tuning_options.objective] or sys.float_info.max
-        return_value = return_value if not self.tuning_options.objective_higher_is_better else -return_value
+        if self.return_invalid:
+            return_value = result[self.tuning_options.objective]
+        else:
+            return_value = result[self.tuning_options.objective] or sys.float_info.max
+        return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value
 
         return return_value
 

From 620ee60d30c3ae4dca423d7544b415ebb4ef5e7b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 12:29:33 -0800
Subject: [PATCH 036/253] Added the handling of invalid configurations,
 training data is directly modified by the evaluation function

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 52 +++++++++++++-------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index c63c836e1..9009bed8e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -33,7 +33,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.initial_sample_taken = False
         self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
-        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
         # set up conversion to tensors
         self.searchspace = searchspace
@@ -41,29 +41,47 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.train_X = torch.empty_like(self.searchspace_tensors)
         self.train_Y = torch.empty(len(self.train_X))
 
-        # get bounds
-        bounds = []
-        for v in searchspace.params_values:
-            bounds.append([min(v), max(v)])
-        bounds = torch.from_numpy(np.array(bounds).transpose())
+        # # get bounds
+        # bounds = []
+        # for v in searchspace.params_values:
+        #     bounds.append([min(v), max(v)])
+        # bounds = torch.from_numpy(np.array(bounds).transpose())
+
+    def run_config(self, config):
+        """Run a single configuration. Returns the result and whether it is valid."""
+        result = self.cost_func(config)
+        valid = not isinstance(result, util.ErrorConfig)
+        if not valid:
+            result = np.nan
+        return result, valid
 
     def evaluate_configs(self, X: Tensor):
-        """Evaluate a tensor of one or multiple configurations."""
+        """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
         if isinstance(X, Tensor):
-            results = []
+            valid_configs = []
+            valid_results = []
             if X.dim() == 1:
-                results = [[self.cost_func(X)]]
-            else:
-                results = [[self.cost_func(c)] for c in X]
-            return torch.from_numpy(np.array(results))
+                X = [X]
+            for config in X:
+                res, valid = self.run_config(config)
+                if valid:
+                    valid_configs.append([config])
+                    valid_results.append([res])
+                else:
+                    # remove invalid configurations from the full searchspace
+                    index = self.searchspace.get_param_config_index(config)
+                    self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+            # add valid results to the training set
+            self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
+            self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
     def initial_sample(self):
         """Take an initial sample."""
         sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size))
-        self.train_X = self.searchspace_tensors.index_select(0, sample_indices)
-        self.train_Y = self.evaluate_configs(self.train_X)
+        sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
+        self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
 
     def initialize_model(self, state_dict=None):
@@ -97,10 +115,8 @@ def run(self, max_fevals: int):
                     choices=self.searchspace_tensors
                 )
                 
-                # Evaluate the new candidate and update the dataset
-                new_y = self.evaluate_configs(candidate)
-                self.train_X = torch.cat([self.train_X, candidate])
-                self.train_Y = torch.cat([self.train_Y, new_y])
+                # evaluate the new candidate
+                self.evaluate_configs(candidate)
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 mll, model = self.initialize_model(model.state_dict())

From 009cf01d195e83853503173b565dc3534e5b5740 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 13:16:34 -0800
Subject: [PATCH 037/253] Setup structure for Tensorspace in Searchspace

---
 kernel_tuner/searchspace.py                  | 42 +++++++++++++++++++-
 kernel_tuner/strategies/bayes_opt_BOTorch.py |  3 +-
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index ca30e2563..2b3946875 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -2,7 +2,7 @@
 import re
 from pathlib import Path
 from random import choice, shuffle
-from typing import List
+from typing import List, Union
 
 import numpy as np
 from constraint import (
@@ -17,6 +17,13 @@
     Solver,
 )
 
+try:
+    import torch
+    from torch import Tensor
+    torch_available = True
+except ImportError:
+    torch_available = False
+
 from kernel_tuner.util import check_restrictions as check_instance_restrictions
 from kernel_tuner.util import compile_restrictions, default_block_size_names
 
@@ -50,6 +57,7 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
+        self.tensorspace = None
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -573,10 +581,40 @@ def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices)
         return list(map(self.list.__getitem__, indices))
 
-    def get_param_config_index(self, param_config: tuple):
+    def get_param_config_index(self, param_config: Union[tuple, Tensor]):
         """Lookup the index for a parameter configuration, returns None if not found."""
+        if torch_available and isinstance(param_config, Tensor):
+            param_config = self.tensor_to_param_config(param_config)
         # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space
         return self.__dict.get(param_config, None)
+    
+    def initialize_tensorspace(self):
+        """Encode the searchspace as floats in a Tensor. Save the mapping."""
+        self._map_tensor_to_param = []  # TODO
+        self._map_param_to_tensor = []  # TODO
+        numpy_repr = self.get_list_numpy()
+        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 0, numpy_repr)
+        self.tensorspace = torch.from_numpy(numpy_repr.astype(float))
+    
+    def get_tensorspace(self):
+        """Get the searchspace encoded in a Tensor."""
+        if self.tensorspace is None:
+            self.initialize_tensorspace()
+        return self.tensorspace
+    
+    def param_config_to_tensor(self, param_config: tuple):
+        """Convert from a parameter configuration to a Tensor."""
+        if self.tensorspace is None:
+            self.initialize_tensorspace()
+        # TODO
+        raise NotImplementedError()
+    
+    def tensor_to_param_config(self, tensor: Tensor):
+        """Convert from a Tensor to a parameter configuration."""
+        if self.tensorspace is None:
+            self.initialize_tensorspace()
+        # TODO
+        raise NotImplementedError()
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 9009bed8e..4a86598fe 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -37,7 +37,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
 
         # set up conversion to tensors
         self.searchspace = searchspace
-        self.searchspace_tensors = torch.from_numpy(searchspace.get_list_numpy().astype(float))
+        self.searchspace_tensors = searchspace.get_tensorspace()
         self.train_X = torch.empty_like(self.searchspace_tensors)
         self.train_Y = torch.empty(len(self.train_X))
 
@@ -63,6 +63,7 @@ def evaluate_configs(self, X: Tensor):
             if X.dim() == 1:
                 X = [X]
             for config in X:
+                assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
                 res, valid = self.run_config(config)
                 if valid:
                     valid_configs.append([config])

From 33983f7c21b1f5bc39aec67d14bc426d295798c0 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 01:08:22 -0800
Subject: [PATCH 038/253] Implemented mappings and conversions to and from
 tensor to parameter configuration

---
 kernel_tuner/searchspace.py | 40 ++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 2b3946875..36001c835 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -1,4 +1,5 @@
 import ast
+import numbers
 import re
 from pathlib import Path
 from random import choice, shuffle
@@ -58,6 +59,9 @@ def __init__(
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
         self.tensorspace = None
+        self.tensor_categorical_dimensions = []
+        self._map_tensor_to_param = []
+        self._map_param_to_tensor = []
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -590,10 +594,21 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
     
     def initialize_tensorspace(self):
         """Encode the searchspace as floats in a Tensor. Save the mapping."""
-        self._map_tensor_to_param = []  # TODO
-        self._map_param_to_tensor = []  # TODO
+        assert self.tensorspace is None, "Tensorspace is already initialized"
+
+        # generate the mappings to and from tensor values
+        for index, param_values in enumerate(self.params_values):
+            if all(isinstance(v, numbers.Real) for v in param_values):
+                tensor_values = np.array(param_values).astype(float)
+            else:
+                self.tensor_categorical_dimensions.append(index)
+                tensor_values = np.arange(len(param_values))
+            self._map_param_to_tensor.append(dict(zip(param_values, tensor_values)))
+            self._map_tensor_to_param.append(dict(zip(tensor_values, param_values)))
+
+        # apply the mappings on the full searchspace
         numpy_repr = self.get_list_numpy()
-        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 0, numpy_repr)
+        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
         self.tensorspace = torch.from_numpy(numpy_repr.astype(float))
     
     def get_tensorspace(self):
@@ -604,17 +619,24 @@ def get_tensorspace(self):
     
     def param_config_to_tensor(self, param_config: tuple):
         """Convert from a parameter configuration to a Tensor."""
-        if self.tensorspace is None:
+        if len(self._map_param_to_tensor) == 0:
             self.initialize_tensorspace()
-        # TODO
-        raise NotImplementedError()
+        array = []
+        for i, param in enumerate(param_config):
+            array.append(self._map_param_to_tensor[i][param])
+        # TODO write tests
+        return torch.from_numpy(np.array(array))
     
     def tensor_to_param_config(self, tensor: Tensor):
         """Convert from a Tensor to a parameter configuration."""
-        if self.tensorspace is None:
+        assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
+        if len(self._map_tensor_to_param) == 0:
             self.initialize_tensorspace()
-        # TODO
-        raise NotImplementedError()
+        config = []
+        for i, param in enumerate(tensor):
+            config.append(self._map_tensor_to_param[i][param])
+        # TODO write tests
+        return tuple(config)
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""

From f3fc81b19a7271518ae3909aa6463b8d794075f7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 09:40:41 -0800
Subject: [PATCH 039/253] Improved efficiency of acquisition function by
 removing evaluated configurations

---
 kernel_tuner/hyper.py                        |  2 +-
 kernel_tuner/searchspace.py                  |  2 +-
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 15 +++++++++------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index b84912a8b..9c052d033 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -61,7 +61,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
     # pass a temporary cache file to avoid duplicate execution
     if 'cache' not in kwargs:
         cachefile = get_random_unique_filename('temp_', '.json')
-        cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
+        cachefile = Path("hyperparamtuning_milo_bruteforce_dual_annealing.json")
         kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 36001c835..69738bc12 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -634,7 +634,7 @@ def tensor_to_param_config(self, tensor: Tensor):
             self.initialize_tensorspace()
         config = []
         for i, param in enumerate(tensor):
-            config.append(self._map_tensor_to_param[i][param])
+            config.append(self._map_tensor_to_param[i][float(param)])
         # TODO write tests
         return tuple(config)
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 4a86598fe..dd7c3e956 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -47,7 +47,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         #     bounds.append([min(v), max(v)])
         # bounds = torch.from_numpy(np.array(bounds).transpose())
 
-    def run_config(self, config):
+    def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result = self.cost_func(config)
         valid = not isinstance(result, util.ErrorConfig)
@@ -64,14 +64,16 @@ def evaluate_configs(self, X: Tensor):
                 X = [X]
             for config in X:
                 assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
-                res, valid = self.run_config(config)
+                param_config = self.searchspace.tensor_to_param_config(config)
+                res, valid = self.run_config(param_config)
                 if valid:
                     valid_configs.append([config])
                     valid_results.append([res])
-                else:
-                    # remove invalid configurations from the full searchspace
-                    index = self.searchspace.get_param_config_index(config)
-                    self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+                
+                # remove evaluated configurations from the full searchspace
+                index = self.searchspace.get_param_config_index(param_config)
+                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+
             # add valid results to the training set
             self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
             self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
@@ -110,6 +112,7 @@ def run(self, max_fevals: int):
                 ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
+                # TODO look into how to handle categorical parameters with MixedSingleTaskGP
                 candidate, _ = optimize_acqf_discrete(
                     ei, 
                     q=1, 

From a5a04716064c2c8217003132271ccebb79ed9888 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 09:41:36 -0800
Subject: [PATCH 040/253] Removed Ax, added BOTorch as dependency

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3d8511493..9d09dbcc5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,10 +58,10 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.10,<3.14"         # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+python = ">=3.10,<3.14"         # TODO from >=3.10, use | instead of Union[] # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"               # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
-ax-platform = ">=0.4.3"
+botorch = ">=0.12.0"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
 python-constraint2 = "^2.0.0b8"

From 9429539f7321ef0f4487973b5f17a41fd0829129 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 09:42:20 -0800
Subject: [PATCH 041/253] Convenience script for benchmarking BO

---
 .gitignore |   1 +
 tune_bo.py | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 160 insertions(+)
 create mode 100644 tune_bo.py

diff --git a/.gitignore b/.gitignore
index 47ffc4024..eb59e44cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ poetry.lock
 noxenv.txt
 noxsettings.toml
 hyperparamtuning/*
+*.prof
 
 ### Python ###
 *.pyc
diff --git a/tune_bo.py b/tune_bo.py
new file mode 100644
index 000000000..81f1fe999
--- /dev/null
+++ b/tune_bo.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+from collections import OrderedDict
+from pathlib import Path
+
+import numpy
+
+import kernel_tuner
+
+# file_path_results = "../last_run/_tune_configuration-results.json"
+# file_path_metadata = "../last_run/_tune_configuration-metadata.json"
+
+
+def ops(w, h, fw, fh):
+    return (w * h * fw * fh * 2) / 1e9
+
+
+unit = "GFLOP"
+w = h = 4096
+fw = fh = 15
+inputs = [w, h, fw, fh]
+total_flops = ops(w, h, fw, fh)
+
+
+# def tune(inputs, lang, strategy):
+def tune(
+    device_name: str,
+    strategy="bayes_opt_BOTorch",
+    strategy_options={ 'max_fevals': 150 },
+    verbose=True,
+    quiet=False,
+    simulation_mode=True,
+    lang="CUDA",
+    profiling=True,
+):  
+    directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
+    assert directory.exists()
+    if lang == "CUDA":
+        kernel_file = directory / "kernels/convolution_milo.cu"
+    elif lang == "HIP":
+        kernel_file = directory / "kernels/convolution_milo.cu.hip"
+    else:
+        raise ValueError(f"Invalid {lang=}")
+
+    with kernel_file.open() as fp:
+        kernel_string = fp.read()
+
+    # setup tunable parameters
+    tune_params = OrderedDict()
+
+    # tune_params["pwr_limit"] = get_pwr_limit(pwr_limit, 0)
+
+    image_width, image_height, filter_width, filter_height = inputs
+
+    tune_params["block_size_x"] = [16 * i for i in range(1, 17)]
+    tune_params["block_size_y"] = [2**i for i in range(5)]
+    tune_params["tile_size_x"] = [i for i in range(1, 5)]
+    tune_params["tile_size_y"] = [i for i in range(1, 5)]
+    tune_params["read_only"] = [0, 1]  # toggle using the read-only cache
+
+    # do dry run
+    # tune_params["nvml_gr_clock"] = [2100]
+    # tune_params["block_size_x"] = [16]
+    # tune_params["block_size_y"] = [1]
+    # tune_params["tile_size_x"] = [1, 2, 4]
+    # tune_params["tile_size_y"] = [1]
+    # tune_params["read_only"] = [1]    #toggle using the read-only cache
+
+    tune_params["use_padding"] = [0, 1]  # toggle the insertion of padding in shared memory
+    tune_params["use_shmem"] = [0, 1]
+    tune_params["use_cmem"] = [1]
+    tune_params["filter_height"] = [filter_height]
+    tune_params["filter_width"] = [filter_width]
+
+    # limit the search to only use padding when its effective
+    restrict = [
+        "use_padding==0 or block_size_x % 32 != 0",
+        "block_size_x*block_size_y<=1024",
+        "use_padding==0 or use_shmem != 0",
+        "use_shmem == 0 or (((block_size_x*tile_size_x+(filter_width-1)))*((block_size_y*tile_size_y+(filter_height-1)))) < 12*1024",
+    ]
+
+    # print(restrict)
+
+    problem_size = (image_width, image_height)
+    size = numpy.prod(problem_size)
+    largest_fh = filter_height
+    largest_fw = filter_width
+    input_size = (problem_size[0] + largest_fw - 1) * (problem_size[1] + largest_fh - 1)
+
+    output_image = numpy.zeros(size).astype(numpy.float32)
+    input_image = numpy.random.randn(input_size).astype(numpy.float32)
+    filter_weights = numpy.random.randn(largest_fh * largest_fw).astype(numpy.float32)
+
+    cmem_args = {"d_filter": filter_weights}
+    args = [output_image, input_image, filter_weights]
+
+    grid_div_x = ["block_size_x", "tile_size_x"]
+    grid_div_y = ["block_size_y", "tile_size_y"]
+
+    total_flops = ops(*inputs)
+    metrics = OrderedDict()
+    metrics["GFLOP/s"] = lambda p: total_flops / (p["time"] / 1000.0)
+
+    def run():
+        return kernel_tuner.tune_kernel(
+            "convolution_kernel",
+            kernel_string,
+            problem_size,
+            args,
+            tune_params,
+            grid_div_y=grid_div_y,
+            grid_div_x=grid_div_x,
+            cmem_args=cmem_args,
+            restrictions=restrict,
+            cache=directory / f"cachefiles/convolution_milo/{device_name}.json",
+            metrics=metrics,
+            lang=lang,
+            iterations=32,
+            device=0,
+            verbose=verbose,
+            quiet=quiet,
+            strategy=strategy,
+            strategy_options=strategy_options,
+            simulation_mode=simulation_mode,
+        )
+
+    # start tuning
+    if profiling:
+        import cProfile
+
+        with cProfile.Profile() as pr:
+            results, env = run()
+            if profiling:
+                pr.dump_stats('bo_prof.prof')
+    else:
+        results, env = run()
+
+    
+    # store_output_file(file_path_results, results, tune_params)
+    # store_metadata_file(file_path_metadata)
+    # print(results)
+    # print(env)
+    return results, env
+
+
+if __name__ == "__main__":
+    # language = sys.argv[1]
+    # device_name = sys.argv[2]
+    language = "CUDA"
+    device_name = "A100"
+
+    # if len(sys.argv) != 2:
+    #     print("Usage: ./convolution.py [language ('HIP' or 'CUDA')] [device name]")
+    #     exit(1)
+
+    if language not in ("HIP", "CUDA"):
+        raise ValueError(f"{language} not valid, specify HIP or CUDA")
+
+    tune(device_name=device_name, lang=language)

From 176b8f566423d6142dba190985e82c8b25af8497 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 10:34:20 -0800
Subject: [PATCH 042/253] Added objective, tuning direction and hyperparameter
 tuning language selection support

---
 kernel_tuner/interface.py                     |  8 +++-
 .../schema/T1/1.0.0/input-schema.json         |  3 +-
 .../schema/T4/1.0.0/results-schema.json       |  6 ++-
 kernel_tuner/strategies/common.py             | 44 +++++++++++--------
 4 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index e9469ec6d..f48d105dc 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -863,6 +863,8 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names):
 def tune_kernel_T1(
     input_filepath: Path,
     cache_filepath: Path = None,
+    objective="time",
+    objective_higher_is_better=False,
     simulation_mode=False,
     output_T4=True,
     iterations=7,
@@ -945,7 +947,7 @@ def tune_kernel_T1(
             raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}")
 
     # tune with the converted inputs
-    # TODO add objective to tune_kernel and get_t4_results calls once available in T1
+    # TODO get_t4_results calls once available in T1
     results, env = tune_kernel(
         kernel_name,
         kernel_source,
@@ -966,9 +968,11 @@ def tune_kernel_T1(
         iterations=iterations,
         strategy=strategy,
         strategy_options=strategy_options,
+        objective=objective,
+        objective_higher_is_better=objective_higher_is_better,
     )
     if output_T4:
-        return get_t4_metadata(), get_t4_results(results, tune_params)
+        return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective)
     return results, env
 
 
diff --git a/kernel_tuner/schema/T1/1.0.0/input-schema.json b/kernel_tuner/schema/T1/1.0.0/input-schema.json
index bb53ee594..598a4b3d1 100644
--- a/kernel_tuner/schema/T1/1.0.0/input-schema.json
+++ b/kernel_tuner/schema/T1/1.0.0/input-schema.json
@@ -189,7 +189,8 @@
                     "enum": [
                         "OpenCL",
                         "CUDA",
-                        "Vulkan"
+                        "Vulkan",
+                        "Hypertuner"
                     ]
                 },
                 "CompilerOptions": {
diff --git a/kernel_tuner/schema/T4/1.0.0/results-schema.json b/kernel_tuner/schema/T4/1.0.0/results-schema.json
index 298f2662c..511042016 100644
--- a/kernel_tuner/schema/T4/1.0.0/results-schema.json
+++ b/kernel_tuner/schema/T4/1.0.0/results-schema.json
@@ -59,7 +59,11 @@
                                     "type": "string"
                                 },
                                 "value": {
-                                    "type": "number"
+                                    "type": [
+                                        "number",
+                                        "string",
+                                        "array"
+                                    ]
                                 },
                                 "unit": {
                                     "type": "string"
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 717d2ca7e..ed142d43c 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -30,7 +30,9 @@
 
 def get_strategy_docstring(name, strategy_options):
     """Generate docstring for a 'tune' method of a strategy."""
-    return _docstring_template.replace("$NAME$", name).replace("$STRAT_OPT$", make_strategy_options_doc(strategy_options))
+    return _docstring_template.replace("$NAME$", name).replace(
+        "$STRAT_OPT$", make_strategy_options_doc(strategy_options)
+    )
 
 
 def make_strategy_options_doc(strategy_options):
@@ -53,7 +55,9 @@ def get_options(strategy_options, options):
 
 
 class CostFunc:
-    def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False):
+    def __init__(
+        self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False
+    ):
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
@@ -61,7 +65,9 @@ def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
-            self.tuning_options['max_fevals'] = min(tuning_options['max_fevals'] if 'max_fevals' in tuning_options else np.inf, searchspace.size)
+            self.tuning_options["max_fevals"] = min(
+                tuning_options["max_fevals"] if "max_fevals" in tuning_options else np.inf, searchspace.size
+            )
         self.results = []
 
     def __call__(self, x, check_restrictions=True):
@@ -69,8 +75,8 @@ def __call__(self, x, check_restrictions=True):
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
 
         # error value to return for numeric optimizers that need a numerical value
-        logging.debug('_cost_func called')
-        logging.debug('x: ' + str(x))
+        logging.debug("_cost_func called")
+        logging.debug("x: " + str(x))
 
         # check if max_fevals is reached or time limit is exceeded
         util.check_stop_criterion(self.tuning_options)
@@ -83,7 +89,7 @@ def __call__(self, x, check_restrictions=True):
                 params = snap_to_nearest_config(x, self.searchspace.tune_params)
         else:
             params = x
-        logging.debug('params ' + str(params))
+        logging.debug("params " + str(params))
 
         legal = True
         result = {}
@@ -152,10 +158,10 @@ def get_bounds_x0_eps(self):
                     eps = min(eps, np.amin(np.gradient(vals)))
 
         self.tuning_options["eps"] = eps
-        logging.debug('get_bounds_x0_eps called')
-        logging.debug('bounds ' + str(bounds))
-        logging.debug('x0 ' + str(x0))
-        logging.debug('eps ' + str(eps))
+        logging.debug("get_bounds_x0_eps called")
+        logging.debug("bounds " + str(bounds))
+        logging.debug("x0 " + str(x0))
+        logging.debug("eps " + str(eps))
 
         return bounds, x0, eps
 
@@ -173,7 +179,7 @@ def setup_method_arguments(method, bounds):
     kwargs = {}
     # pass bounds to methods that support it
     if method in ["L-BFGS-B", "TNC", "SLSQP"]:
-        kwargs['bounds'] = bounds
+        kwargs["bounds"] = bounds
     return kwargs
 
 
@@ -186,21 +192,21 @@ def setup_method_options(method, tuning_options):
         maxiter = tuning_options.strategy_options.maxiter
     else:
         maxiter = 100
-    kwargs['maxiter'] = maxiter
+    kwargs["maxiter"] = maxiter
     if method in ["Nelder-Mead", "Powell"]:
-        kwargs['maxfev'] = maxiter
+        kwargs["maxfev"] = maxiter
     elif method == "L-BFGS-B":
-        kwargs['maxfun'] = maxiter
+        kwargs["maxfun"] = maxiter
 
     # pass eps to methods that support it
     if method in ["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"]:
-        kwargs['eps'] = tuning_options.eps
+        kwargs["eps"] = tuning_options.eps
     elif method == "COBYLA":
-        kwargs['rhobeg'] = tuning_options.eps
+        kwargs["rhobeg"] = tuning_options.eps
 
     # not all methods support 'disp' option
-    if method not in ['TNC']:
-        kwargs['disp'] = tuning_options.verbose
+    if method not in ["TNC"]:
+        kwargs["disp"] = tuning_options.verbose
 
     return kwargs
 
@@ -247,5 +253,5 @@ def scale_from_params(params, tune_params, eps):
     """Helper func to do the inverse of the 'unscale' function."""
     x = np.zeros(len(params))
     for i, v in enumerate(tune_params.values()):
-        x[i] = 0.5 * eps + v.index(params[i])*eps
+        x[i] = 0.5 * eps + v.index(params[i]) * eps
     return x

From 196af62d19b2a4540ed3d8edac623a4d793ce1be Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 18:16:44 -0800
Subject: [PATCH 043/253] Completed implementation of mixed-type handling and
 handling of invalid and  evaluated configurations

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index dd7c3e956..6f80126e3 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -38,8 +38,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         # set up conversion to tensors
         self.searchspace = searchspace
         self.searchspace_tensors = searchspace.get_tensorspace()
-        self.train_X = torch.empty_like(self.searchspace_tensors)
-        self.train_Y = torch.empty(len(self.train_X))
+        self.train_X = torch.empty(0)
+        self.train_Y = torch.empty(0)
 
         # # get bounds
         # bounds = []
@@ -50,10 +50,10 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result = self.cost_func(config)
-        valid = not isinstance(result, util.ErrorConfig)
+        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result)
         if not valid:
             result = np.nan
-        return result, valid
+        return [result], valid
 
     def evaluate_configs(self, X: Tensor):
         """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
@@ -67,16 +67,17 @@ def evaluate_configs(self, X: Tensor):
                 param_config = self.searchspace.tensor_to_param_config(config)
                 res, valid = self.run_config(param_config)
                 if valid:
-                    valid_configs.append([config])
-                    valid_results.append([res])
+                    valid_configs.append(config)
+                    valid_results.append(res)
                 
                 # remove evaluated configurations from the full searchspace
                 index = self.searchspace.get_param_config_index(param_config)
                 self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
 
             # add valid results to the training set
-            self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
-            self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
+            if len(valid_configs) > 0 and len(valid_results) > 0:
+                self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
+                self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         

From 55a5c1a221d24aedce4d351ba37c2fb15b81ac5d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 18:19:50 -0800
Subject: [PATCH 044/253] Added docstrings, improved formatting

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 6f80126e3..4f2613ca4 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -22,6 +22,7 @@
 
 
 def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
     bo = BayesianOptimization(searchspace, runner, tuning_options)
     return bo.run(max_fevals)
@@ -30,6 +31,7 @@ class BayesianOptimization():
     """Bayesian Optimization class."""
 
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        """Initialization of the Bayesian Optimization class. Does not evaluate configurations."""
         self.initial_sample_taken = False
         self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
@@ -41,12 +43,6 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.train_X = torch.empty(0)
         self.train_Y = torch.empty(0)
 
-        # # get bounds
-        # bounds = []
-        # for v in searchspace.params_values:
-        #     bounds.append([min(v), max(v)])
-        # bounds = torch.from_numpy(np.array(bounds).transpose())
-
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result = self.cost_func(config)
@@ -72,7 +68,8 @@ def evaluate_configs(self, X: Tensor):
                 
                 # remove evaluated configurations from the full searchspace
                 index = self.searchspace.get_param_config_index(param_config)
-                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], 
+                                                      self.searchspace_tensors[index+1:]))
 
             # add valid results to the training set
             if len(valid_configs) > 0 and len(valid_results) > 0:

From d64f783c04f26e8d239214d862ee7b60adfde678 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:09:34 -0800
Subject: [PATCH 045/253] Extended strategies test to test for ability to
 handle non-numeric and mixed parameter values

---
 test/strategies/test_strategies.py | 10 +++-
 test/test_cache_file.json          | 94 +++++++++++++++++++++++++-----
 2 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 11b231e62..4e4fbb8c1 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -31,6 +31,9 @@ def vector_add():
     args = [c, a, b, n]
     tune_params = dict()
     tune_params["block_size_x"] = [128 + 64 * i for i in range(15)]
+    tune_params["test_string"] = ["alg_1", "alg_2"]
+    tune_params["test_bool"] = [True, False]
+    tune_params["test_mixed"] = ["test", 1, True, 2.45]
 
     return ["vector_add", kernel_string, size, args, tune_params]
 
@@ -58,7 +61,9 @@ def test_strategies(vector_add, strategy):
         filter_options = options
     filter_options["max_fevals"] = 10
 
-    results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options,
+    restrictions = ["test_string == 'alg_2'", "test_bool == True", "test_mixed == 2.45"]
+
+    results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                          verbose=False, cache=cache_filename, simulation_mode=True)
 
     assert len(results) > 0
@@ -76,6 +81,9 @@ def test_strategies(vector_add, strategy):
     # check whether the returned dictionaries contain exactly the expected keys and the appropriate type
     expected_items = {
         'block_size_x': int,
+        'test_string': str,
+        'test_bool': bool,
+        'test_mixed': float,
         'time': (float, int),
         'times': list,
         'compile_time': (float, int),
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
index 3299441c5..5e0c0e054 100644
--- a/test/test_cache_file.json
+++ b/test/test_cache_file.json
@@ -2,7 +2,10 @@
     "device_name": "NVIDIA RTX A4000",
     "kernel_name": "vector_add",
     "tune_params_keys": [
-        "block_size_x"
+        "block_size_x",
+        "test_string",
+        "test_bool",
+        "test_mixed"
     ],
     "tune_params": {
         "block_size_x": [
@@ -21,11 +24,28 @@
             896,
             960,
             1024
+        ],
+        "test_string": [
+            "alg_1",
+            "alg_2"
+        ],
+        "test_bool": [
+            true,
+            false
+        ],
+        "test_mixed": [
+            "test",
+            1,
+            true,
+            2.45
         ]
     },
     "cache": {
-        "128": {
+        "128,alg_2,True,2.45": {
             "block_size_x": 128,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04073600071881499,
             "times": [
                 0.1268800050020218,
@@ -43,8 +63,11 @@
             "framework_time": 0.8587837219238281,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "192": {
+        "192,alg_2,True,2.45": {
             "block_size_x": 192,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04095085710287094,
             "times": [
                 0.12908799946308136,
@@ -62,8 +85,11 @@
             "framework_time": 1.6656816005706787,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "256": {
+        "256,alg_2,True,2.45": {
             "block_size_x": 256,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.13180799782276154,
@@ -81,8 +107,11 @@
             "framework_time": 1.6054585576057434,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "320": {
+        "320,alg_2,True,2.45": {
             "block_size_x": 320,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04208914376795292,
             "times": [
                 0.1358720064163208,
@@ -100,8 +129,11 @@
             "framework_time": 1.4494173228740692,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "384": {
+        "384,alg_2,True,2.45": {
             "block_size_x": 384,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04174171467976911,
             "times": [
                 0.13251200318336487,
@@ -119,8 +151,11 @@
             "framework_time": 1.682564616203308,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "448": {
+        "448,alg_2,True,2.45": {
             "block_size_x": 448,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.03249828570655414,
             "times": [
                 0.0647680014371872,
@@ -138,8 +173,11 @@
             "framework_time": 1.5890561044216156,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "512": {
+        "512,alg_2,True,2.45": {
             "block_size_x": 512,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04139885599059718,
             "times": [
                 0.13023999333381653,
@@ -157,8 +195,11 @@
             "framework_time": 1.853298395872116,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "576": {
+        "576,alg_2,True,2.45": {
             "block_size_x": 576,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.12995199859142303,
@@ -176,8 +217,11 @@
             "framework_time": 1.8403716385364532,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "640": {
+        "640,alg_2,True,2.45": {
             "block_size_x": 640,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.0411702852163996,
             "times": [
                 0.12796799838542938,
@@ -195,8 +239,11 @@
             "framework_time": 1.8264725804328918,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "704": {
+        "704,alg_2,True,2.45": {
             "block_size_x": 704,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04184228580977235,
             "times": [
                 0.1343040019273758,
@@ -214,8 +261,11 @@
             "framework_time": 1.6709677875041962,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "768": {
+        "768,alg_2,True,2.45": {
             "block_size_x": 768,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.03175771422684193,
             "times": [
                 0.06230400130152702,
@@ -233,8 +283,11 @@
             "framework_time": 1.7531625926494598,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "832": {
+        "832,alg_2,True,2.45": {
             "block_size_x": 832,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.040941715240478516,
             "times": [
                 0.12998400628566742,
@@ -252,8 +305,11 @@
             "framework_time": 2.1368376910686493,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "896": {
+        "896,alg_2,True,2.45": {
             "block_size_x": 896,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04177371359297207,
             "times": [
                 0.12931199371814728,
@@ -271,8 +327,11 @@
             "framework_time": 2.03637033700943,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "960": {
+        "960,alg_2,True,2.45": {
             "block_size_x": 960,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.042189714631864,
             "times": [
                 0.1335040032863617,
@@ -290,8 +349,11 @@
             "framework_time": 1.7383433878421783,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "1024": {
+        "1024,alg_2,True,2.45": {
             "block_size_x": 1024,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04114742816558906,
             "times": [
                 0.13087999820709229,

From e95ab30ca2d5bdd885616a0f3ee1ffb7b05dc475 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:10:39 -0800
Subject: [PATCH 046/253] Mixed-type parameters are not converted to numeric
 constraints

---
 kernel_tuner/util.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index e8d194e11..dac5d6de4 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -889,7 +889,7 @@ def to_numeric_constraint(
         if len(comparators_found) != 1:
             return None
         comparator = comparators_found[0]
-
+    
         # split the string on the comparison and remove leading and trailing whitespace
         left, right = tuple(s.strip() for s in restriction.split(comparator))
 
@@ -1032,7 +1032,8 @@ def to_equality_constraint(
                 ):
                     parsed_restriction = parsed_restriction[1:-1]
                 # check if we can turn this into the built-in numeric comparison constraint
-                finalized_constraint = to_numeric_constraint(parsed_restriction, params_used)
+                if all(all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param]) for param in params_used):
+                    finalized_constraint = to_numeric_constraint(parsed_restriction, params_used)
                 if finalized_constraint is None:
                     # check if we can turn this into the built-in equality comparison constraint
                     finalized_constraint = to_equality_constraint(parsed_restriction, params_used)

From 10a6a5c1557a3bfefa2218657136e05f944a6fea Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:11:58 -0800
Subject: [PATCH 047/253] CostFunc can now encode and decode non-numeric
 configurations for strategies that require only numerics

---
 kernel_tuner/strategies/common.py | 53 +++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index ed142d43c..9c2623132 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,4 +1,5 @@
 import logging
+import numbers
 import sys
 from time import perf_counter
 
@@ -56,11 +57,24 @@ def get_options(strategy_options, options):
 
 class CostFunc:
     def __init__(
-        self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False
+        self, searchspace: Searchspace, tuning_options, runner, *, 
+        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False
     ):
+        """An abstract method to handle evaluation of configurations.
+
+        Args:
+            searchspace: the Searchspace to evaluate on.
+            tuning_options: various tuning options.
+            runner: the runner to use.
+            scaling: whether to internally scale parameter values. Defaults to False.
+            snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True.
+            encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
+            return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
+        """        
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
+        self.encode_non_numeric = encode_non_numeric
         self.return_invalid = return_invalid
         self.searchspace = searchspace
         self.tuning_options = tuning_options
@@ -70,9 +84,24 @@ def __init__(
             )
         self.results = []
 
+        # if enabled, encode non-numeric parameter values as a numeric value
+        if self.encode_non_numeric:
+            self._map_param_to_encoded = {}
+            self._map_encoded_to_param = {}
+            self.encoded_params_values = []
+            for i, param_values in enumerate(self.searchspace.params_values):
+                encoded_values = param_values
+                if not all(isinstance(v, numbers.Real) for v in param_values):
+                    encoded_values = np.arange(len(param_values))
+                    self._map_param_to_encoded[i] = dict(zip(param_values, encoded_values))
+                    self._map_encoded_to_param[i] = dict(zip(encoded_values, param_values))
+                self.encoded_params_values.append(encoded_values)
+
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
+        if self.encode_non_numeric:
+            x = self.encoded_to_params(x)
 
         # error value to return for numeric optimizers that need a numerical value
         logging.debug("_cost_func called")
@@ -168,10 +197,30 @@ def get_bounds_x0_eps(self):
     def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
-        for values in self.searchspace.tune_params.values():
+        for values in self.encoded_params_values if self.encode_non_numeric else  self.searchspace.params_values:
             sorted_values = np.sort(values)
             bounds.append((sorted_values[0], sorted_values[-1]))
         return bounds
+    
+    def encoded_to_params(self, config):
+        """Convert from an encoded configuration to the real parameters."""
+        if not self.encode_non_numeric:
+            raise ValueError("'encode_non_numeric' must be set to true to use this function.")
+        params = []
+        for i, v in enumerate(config):
+            params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
+        assert len(params) == len(config)            
+        return params
+    
+    def params_to_encoded(self, config):
+        """Convert from a parameter configuration to the encoded configuration."""
+        if not self.encode_non_numeric:
+            raise ValueError("'encode_non_numeric' must be set to true to use this function.")
+        encoded = []
+        for i, v in enumerate(config):
+            encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
+        assert len(encoded) == len(config)            
+        return encoded
 
 
 def setup_method_arguments(method, bounds):

From 6ae3ba65d010a7aeb10b38eb1007b25ebdbc6760 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:16:33 -0800
Subject: [PATCH 048/253] Fixed logging statements, improved formatting

---
 kernel_tuner/strategies/common.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 9c2623132..28b36c84f 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,3 +1,5 @@
+"""Module for functionality that is commonly used throughout Kernel Tuner."""
+
 import logging
 import numbers
 import sys
@@ -56,6 +58,8 @@ def get_options(strategy_options, options):
 
 
 class CostFunc:
+    """Class encapsulating the CostFunc method."""
+
     def __init__(
         self, searchspace: Searchspace, tuning_options, runner, *, 
         scaling=False, snap=True, encode_non_numeric=False, return_invalid=False
@@ -105,7 +109,7 @@ def __call__(self, x, check_restrictions=True):
 
         # error value to return for numeric optimizers that need a numerical value
         logging.debug("_cost_func called")
-        logging.debug("x: " + str(x))
+        logging.debug("x: %s", str(x))
 
         # check if max_fevals is reached or time limit is exceeded
         util.check_stop_criterion(self.tuning_options)
@@ -118,7 +122,7 @@ def __call__(self, x, check_restrictions=True):
                 params = snap_to_nearest_config(x, self.searchspace.tune_params)
         else:
             params = x
-        logging.debug("params " + str(params))
+        logging.debug("params %s", str(params))
 
         legal = True
         result = {}
@@ -188,9 +192,9 @@ def get_bounds_x0_eps(self):
 
         self.tuning_options["eps"] = eps
         logging.debug("get_bounds_x0_eps called")
-        logging.debug("bounds " + str(bounds))
-        logging.debug("x0 " + str(x0))
-        logging.debug("eps " + str(eps))
+        logging.debug("bounds %s", str(bounds))
+        logging.debug("x0 %s", str(x0))
+        logging.debug("eps %s", str(eps))
 
         return bounds, x0, eps
 

From 4873a20c59b60325affc56af1fcc093787754356 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:18:34 -0800
Subject: [PATCH 049/253] Improved the performance of get_bounds

---
 kernel_tuner/strategies/common.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 28b36c84f..5f64618d5 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -201,9 +201,8 @@ def get_bounds_x0_eps(self):
     def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
-        for values in self.encoded_params_values if self.encode_non_numeric else  self.searchspace.params_values:
-            sorted_values = np.sort(values)
-            bounds.append((sorted_values[0], sorted_values[-1]))
+        for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
+            bounds.append((min(values), max(values)))
         return bounds
     
     def encoded_to_params(self, config):

From bae7e9678711e499561f00030346689cfc55db7f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:20:02 -0800
Subject: [PATCH 050/253] Applied non-numeric encoding in differential
 evolution to handle non-numeric parameter values

---
 kernel_tuner/strategies/diff_evo.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 5ad2b9474..62e966f33 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -6,7 +6,8 @@
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
 
-supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
+supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp",
+                     "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
 
 _options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"),
                        popsize=("Population size", 20),
@@ -18,17 +19,18 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     method, popsize, maxiter = common.get_options(tuning_options.strategy_options, _options)
 
-    # build a bounds array as needed for the optimizer
-    cost_func = CostFunc(searchspace, tuning_options, runner)
+    # build a bounds array as needed for the optimizer, and encode because it can't handle non-numeric values
+    cost_func = CostFunc(searchspace, tuning_options, runner, encode_non_numeric=True)
     bounds = cost_func.get_bounds()
 
     # ensure particles start from legal points
     population = list(list(p) for p in searchspace.get_random_sample(popsize))
+    population_enc = [cost_func.params_to_encoded(c) for c in population]
 
     # call the differential evolution optimizer
     opt_result = None
     try:
-        opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population,
+        opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population_enc,
                                         polish=False, strategy=method, disp=tuning_options.verbose)
     except util.StopCriterionReached as e:
         if tuning_options.verbose:

From 7eb7ef7b86c50f26d5a65beadd69baf6492ad020 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:50:53 -0800
Subject: [PATCH 051/253] Implemented automatic conversion to multiple types
 for encoded tensor parameter lookup

---
 kernel_tuner/searchspace.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 69738bc12..c18a7518c 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -623,7 +623,17 @@ def param_config_to_tensor(self, param_config: tuple):
             self.initialize_tensorspace()
         array = []
         for i, param in enumerate(param_config):
-            array.append(self._map_param_to_tensor[i][param])
+            mapping = self._map_param_to_tensor[i]
+            conversions = [None, str, float, int, bool]
+            for c in conversions:
+                try:
+                    c_param = param if c is None else c(param)
+                    array.append(mapping[c_param])
+                    break
+                except (KeyError, ValueError) as e:
+                    if c == conversions[-1]:
+                        raise KeyError(f"No variant of {param} could be found in {mapping}") from e
+
         # TODO write tests
         return torch.from_numpy(np.array(array))
     

From 91d3ce4f8b87d7bff1b963297d4fc9c666fb1243 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:13:49 -0800
Subject: [PATCH 052/253] Added tests for Searchspace tensor encoding and
 conversion

---
 kernel_tuner/searchspace.py       |  3 ---
 kernel_tuner/strategies/common.py |  2 +-
 test/test_searchspace.py          | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index c18a7518c..6b3e54e21 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -633,8 +633,6 @@ def param_config_to_tensor(self, param_config: tuple):
                 except (KeyError, ValueError) as e:
                     if c == conversions[-1]:
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
-
-        # TODO write tests
         return torch.from_numpy(np.array(array))
     
     def tensor_to_param_config(self, tensor: Tensor):
@@ -645,7 +643,6 @@ def tensor_to_param_config(self, tensor: Tensor):
         config = []
         for i, param in enumerate(tensor):
             config.append(self._map_tensor_to_param[i][float(param)])
-        # TODO write tests
         return tuple(config)
 
     def __prepare_neighbors_index(self):
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 5f64618d5..7901f97a0 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,4 +1,4 @@
-"""Module for functionality that is commonly used throughout Kernel Tuner."""
+"""Module for functionality that is commonly used throughout the strategies."""
 
 import logging
 import numbers
diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 8672c1d03..48f049750 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -177,6 +177,21 @@ def test_param_index_lookup():
     assert simple_searchspace.get_param_indices(last) == (3, 1, 1)
 
 
+def test_get_tensorspace():
+    """Test the generation of a tensor space."""
+    tensorspace = simple_searchspace.get_tensorspace()
+    assert tensorspace.shape == simple_searchspace.get_list_numpy().shape
+
+
+def test_conversion_tensor_param_config():
+    """Test the conversion from a parameter configuration to a tensor and tensor to parameter configuration."""
+    for config in simple_searchspace.list:
+        tensor = simple_searchspace.param_config_to_tensor(config)
+        config_2 = simple_searchspace.tensor_to_param_config(tensor)
+        assert config == config_2
+        assert tensor.equal(simple_searchspace.param_config_to_tensor(config_2))
+
+
 def test_random_sample():
     """Test whether the random sample indices exists and are unique, and if it throws an error for too many samples."""
     random_sample_indices = searchspace.get_random_sample_indices(100)

From 80d514e65b9038364723a6d89e05fe93a9ea6b81 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:19:27 -0800
Subject: [PATCH 053/253] Seperated strategies and runners test cache file

---
 .gitignore                           |   1 +
 test/strategies/test_cache_file.json | 375 +++++++++++++++++++++++++++
 test/strategies/test_strategies.py   |   2 +-
 test/test_cache_file.json            |  94 ++-----
 4 files changed, 393 insertions(+), 79 deletions(-)
 create mode 100644 test/strategies/test_cache_file.json

diff --git a/.gitignore b/.gitignore
index eb59e44cb..ce4873209 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ push_to_pypi.sh
 *.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
+!test_cache_file.json
 *.csv
 .cache
 *.ipynb_checkpoints
diff --git a/test/strategies/test_cache_file.json b/test/strategies/test_cache_file.json
new file mode 100644
index 000000000..5e0c0e054
--- /dev/null
+++ b/test/strategies/test_cache_file.json
@@ -0,0 +1,375 @@
+{
+    "device_name": "NVIDIA RTX A4000",
+    "kernel_name": "vector_add",
+    "tune_params_keys": [
+        "block_size_x",
+        "test_string",
+        "test_bool",
+        "test_mixed"
+    ],
+    "tune_params": {
+        "block_size_x": [
+            128,
+            192,
+            256,
+            320,
+            384,
+            448,
+            512,
+            576,
+            640,
+            704,
+            768,
+            832,
+            896,
+            960,
+            1024
+        ],
+        "test_string": [
+            "alg_1",
+            "alg_2"
+        ],
+        "test_bool": [
+            true,
+            false
+        ],
+        "test_mixed": [
+            "test",
+            1,
+            true,
+            2.45
+        ]
+    },
+    "cache": {
+        "128,alg_2,True,2.45": {
+            "block_size_x": 128,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04073600071881499,
+            "times": [
+                0.1268800050020218,
+                0.031072000041604042,
+                0.027295999228954315,
+                0.025472000241279602,
+                0.025119999423623085,
+                0.025248000398278236,
+                0.024064000695943832
+            ],
+            "compile_time": 440.9545585513115,
+            "verification_time": 0,
+            "benchmark_time": 1.091592013835907,
+            "strategy_time": 0,
+            "framework_time": 0.8587837219238281,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "192,alg_2,True,2.45": {
+            "block_size_x": 192,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04095085710287094,
+            "times": [
+                0.12908799946308136,
+                0.03046399913728237,
+                0.027744000777602196,
+                0.025151999667286873,
+                0.024960000067949295,
+                0.024992000311613083,
+                0.02425600029528141
+            ],
+            "compile_time": 436.15153804421425,
+            "verification_time": 0,
+            "benchmark_time": 1.0972395539283752,
+            "strategy_time": 0,
+            "framework_time": 1.6656816005706787,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "256,alg_2,True,2.45": {
+            "block_size_x": 256,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04189257137477398,
+            "times": [
+                0.13180799782276154,
+                0.031136000528931618,
+                0.028095999732613564,
+                0.027008000761270523,
+                0.025087999179959297,
+                0.02505600079894066,
+                0.02505600079894066
+            ],
+            "compile_time": 436.5839697420597,
+            "verification_time": 0,
+            "benchmark_time": 1.0691732168197632,
+            "strategy_time": 0,
+            "framework_time": 1.6054585576057434,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "320,alg_2,True,2.45": {
+            "block_size_x": 320,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04208914376795292,
+            "times": [
+                0.1358720064163208,
+                0.030688000842928886,
+                0.02768000029027462,
+                0.02582399919629097,
+                0.025087999179959297,
+                0.025312000885605812,
+                0.024159999564290047
+            ],
+            "compile_time": 438.9761835336685,
+            "verification_time": 0,
+            "benchmark_time": 1.0976120829582214,
+            "strategy_time": 0,
+            "framework_time": 1.4494173228740692,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "384,alg_2,True,2.45": {
+            "block_size_x": 384,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04174171467976911,
+            "times": [
+                0.13251200318336487,
+                0.03167999908328056,
+                0.027871999889612198,
+                0.025312000885605812,
+                0.024671999737620354,
+                0.02505600079894066,
+                0.025087999179959297
+            ],
+            "compile_time": 440.71199372410774,
+            "verification_time": 0,
+            "benchmark_time": 1.0499358177185059,
+            "strategy_time": 0,
+            "framework_time": 1.682564616203308,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "448,alg_2,True,2.45": {
+            "block_size_x": 448,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.03249828570655414,
+            "times": [
+                0.0647680014371872,
+                0.03167999908328056,
+                0.028255999088287354,
+                0.025280000641942024,
+                0.027103999629616737,
+                0.02550400048494339,
+                0.02489599958062172
+            ],
+            "compile_time": 449.13655519485474,
+            "verification_time": 0,
+            "benchmark_time": 1.1196956038475037,
+            "strategy_time": 0,
+            "framework_time": 1.5890561044216156,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "512,alg_2,True,2.45": {
+            "block_size_x": 512,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04139885599059718,
+            "times": [
+                0.13023999333381653,
+                0.031136000528931618,
+                0.02831999957561493,
+                0.02595200017094612,
+                0.024607999250292778,
+                0.025151999667286873,
+                0.024383999407291412
+            ],
+            "compile_time": 440.5844733119011,
+            "verification_time": 0,
+            "benchmark_time": 1.09076127409935,
+            "strategy_time": 0,
+            "framework_time": 1.853298395872116,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "576,alg_2,True,2.45": {
+            "block_size_x": 576,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04189257137477398,
+            "times": [
+                0.12995199859142303,
+                0.03200000151991844,
+                0.028511999174952507,
+                0.026623999699950218,
+                0.025760000571608543,
+                0.02537599951028824,
+                0.02502400055527687
+            ],
+            "compile_time": 442.16764718294144,
+            "verification_time": 0,
+            "benchmark_time": 1.1038780212402344,
+            "strategy_time": 0,
+            "framework_time": 1.8403716385364532,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "640,alg_2,True,2.45": {
+            "block_size_x": 640,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.0411702852163996,
+            "times": [
+                0.12796799838542938,
+                0.03081599995493889,
+                0.02969600073993206,
+                0.025439999997615814,
+                0.02409599907696247,
+                0.02582399919629097,
+                0.024351999163627625
+            ],
+            "compile_time": 437.98910081386566,
+            "verification_time": 0,
+            "benchmark_time": 1.0496266186237335,
+            "strategy_time": 0,
+            "framework_time": 1.8264725804328918,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "704,alg_2,True,2.45": {
+            "block_size_x": 704,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04184228580977235,
+            "times": [
+                0.1343040019273758,
+                0.03094400092959404,
+                0.02908799983561039,
+                0.025151999667286873,
+                0.02486399933695793,
+                0.024447999894618988,
+                0.02409599907696247
+            ],
+            "compile_time": 443.51235404610634,
+            "verification_time": 0,
+            "benchmark_time": 1.1033527553081512,
+            "strategy_time": 0,
+            "framework_time": 1.6709677875041962,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "768,alg_2,True,2.45": {
+            "block_size_x": 768,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.03175771422684193,
+            "times": [
+                0.06230400130152702,
+                0.0315839983522892,
+                0.02831999957561493,
+                0.02672000043094158,
+                0.023679999634623528,
+                0.023903999477624893,
+                0.02579200081527233
+            ],
+            "compile_time": 450.4409395158291,
+            "verification_time": 0,
+            "benchmark_time": 1.101326197385788,
+            "strategy_time": 0,
+            "framework_time": 1.7531625926494598,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "832,alg_2,True,2.45": {
+            "block_size_x": 832,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.040941715240478516,
+            "times": [
+                0.12998400628566742,
+                0.03094400092959404,
+                0.027103999629616737,
+                0.024768000468611717,
+                0.025439999997615814,
+                0.023903999477624893,
+                0.024447999894618988
+            ],
+            "compile_time": 439.9200603365898,
+            "verification_time": 0,
+            "benchmark_time": 1.0421127080917358,
+            "strategy_time": 0,
+            "framework_time": 2.1368376910686493,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "896,alg_2,True,2.45": {
+            "block_size_x": 896,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04177371359297207,
+            "times": [
+                0.12931199371814728,
+                0.03731200098991394,
+                0.02812799997627735,
+                0.02502400055527687,
+                0.02412799932062626,
+                0.024768000468611717,
+                0.023744000121951103
+            ],
+            "compile_time": 439.23527002334595,
+            "verification_time": 0,
+            "benchmark_time": 1.0946877300739288,
+            "strategy_time": 0,
+            "framework_time": 2.03637033700943,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "960,alg_2,True,2.45": {
+            "block_size_x": 960,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.042189714631864,
+            "times": [
+                0.1335040032863617,
+                0.031039999797940254,
+                0.02876799926161766,
+                0.02579200081527233,
+                0.025119999423623085,
+                0.02566399984061718,
+                0.025439999997615814
+            ],
+            "compile_time": 441.7596235871315,
+            "verification_time": 0,
+            "benchmark_time": 1.1166557669639587,
+            "strategy_time": 0,
+            "framework_time": 1.7383433878421783,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "1024,alg_2,True,2.45": {
+            "block_size_x": 1024,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04114742816558906,
+            "times": [
+                0.13087999820709229,
+                0.03049599938094616,
+                0.027936000376939774,
+                0.02486399933695793,
+                0.0244159996509552,
+                0.024320000782608986,
+                0.025119999423623085
+            ],
+            "compile_time": 442.8337663412094,
+            "verification_time": 0,
+            "benchmark_time": 1.0683201253414154,
+            "strategy_time": 0,
+            "framework_time": 1.9918642938137054,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        }
+    }
+}
\ No newline at end of file
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 4e4fbb8c1..b7b2851dd 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -9,7 +9,7 @@
 
 from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
 
-cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/../test_cache_file.json"
+cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
 
 @pytest.fixture
 def vector_add():
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
index 5e0c0e054..3299441c5 100644
--- a/test/test_cache_file.json
+++ b/test/test_cache_file.json
@@ -2,10 +2,7 @@
     "device_name": "NVIDIA RTX A4000",
     "kernel_name": "vector_add",
     "tune_params_keys": [
-        "block_size_x",
-        "test_string",
-        "test_bool",
-        "test_mixed"
+        "block_size_x"
     ],
     "tune_params": {
         "block_size_x": [
@@ -24,28 +21,11 @@
             896,
             960,
             1024
-        ],
-        "test_string": [
-            "alg_1",
-            "alg_2"
-        ],
-        "test_bool": [
-            true,
-            false
-        ],
-        "test_mixed": [
-            "test",
-            1,
-            true,
-            2.45
         ]
     },
     "cache": {
-        "128,alg_2,True,2.45": {
+        "128": {
             "block_size_x": 128,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04073600071881499,
             "times": [
                 0.1268800050020218,
@@ -63,11 +43,8 @@
             "framework_time": 0.8587837219238281,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "192,alg_2,True,2.45": {
+        "192": {
             "block_size_x": 192,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04095085710287094,
             "times": [
                 0.12908799946308136,
@@ -85,11 +62,8 @@
             "framework_time": 1.6656816005706787,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "256,alg_2,True,2.45": {
+        "256": {
             "block_size_x": 256,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.13180799782276154,
@@ -107,11 +81,8 @@
             "framework_time": 1.6054585576057434,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "320,alg_2,True,2.45": {
+        "320": {
             "block_size_x": 320,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04208914376795292,
             "times": [
                 0.1358720064163208,
@@ -129,11 +100,8 @@
             "framework_time": 1.4494173228740692,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "384,alg_2,True,2.45": {
+        "384": {
             "block_size_x": 384,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04174171467976911,
             "times": [
                 0.13251200318336487,
@@ -151,11 +119,8 @@
             "framework_time": 1.682564616203308,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "448,alg_2,True,2.45": {
+        "448": {
             "block_size_x": 448,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.03249828570655414,
             "times": [
                 0.0647680014371872,
@@ -173,11 +138,8 @@
             "framework_time": 1.5890561044216156,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "512,alg_2,True,2.45": {
+        "512": {
             "block_size_x": 512,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04139885599059718,
             "times": [
                 0.13023999333381653,
@@ -195,11 +157,8 @@
             "framework_time": 1.853298395872116,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "576,alg_2,True,2.45": {
+        "576": {
             "block_size_x": 576,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.12995199859142303,
@@ -217,11 +176,8 @@
             "framework_time": 1.8403716385364532,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "640,alg_2,True,2.45": {
+        "640": {
             "block_size_x": 640,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.0411702852163996,
             "times": [
                 0.12796799838542938,
@@ -239,11 +195,8 @@
             "framework_time": 1.8264725804328918,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "704,alg_2,True,2.45": {
+        "704": {
             "block_size_x": 704,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04184228580977235,
             "times": [
                 0.1343040019273758,
@@ -261,11 +214,8 @@
             "framework_time": 1.6709677875041962,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "768,alg_2,True,2.45": {
+        "768": {
             "block_size_x": 768,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.03175771422684193,
             "times": [
                 0.06230400130152702,
@@ -283,11 +233,8 @@
             "framework_time": 1.7531625926494598,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "832,alg_2,True,2.45": {
+        "832": {
             "block_size_x": 832,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.040941715240478516,
             "times": [
                 0.12998400628566742,
@@ -305,11 +252,8 @@
             "framework_time": 2.1368376910686493,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "896,alg_2,True,2.45": {
+        "896": {
             "block_size_x": 896,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04177371359297207,
             "times": [
                 0.12931199371814728,
@@ -327,11 +271,8 @@
             "framework_time": 2.03637033700943,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "960,alg_2,True,2.45": {
+        "960": {
             "block_size_x": 960,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.042189714631864,
             "times": [
                 0.1335040032863617,
@@ -349,11 +290,8 @@
             "framework_time": 1.7383433878421783,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "1024,alg_2,True,2.45": {
+        "1024": {
             "block_size_x": 1024,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04114742816558906,
             "times": [
                 0.13087999820709229,

From a489252d26f9987cccb0c02a5b21963a47acba93 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:31:25 -0800
Subject: [PATCH 054/253] Implemented handling of categorical parameters

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 4f2613ca4..d0c56476e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -6,7 +6,7 @@
     import torch
     from botorch import fit_gpytorch_mll
     from botorch.acquisition import ExpectedImprovement
-    from botorch.models import SingleTaskGP
+    from botorch.models import MixedSingleTaskGP, SingleTaskGP
     from botorch.optim import optimize_acqf_discrete
     from gpytorch.mlls import ExactMarginalLogLikelihood
     from torch import Tensor
@@ -87,7 +87,10 @@ def initial_sample(self):
 
     def initialize_model(self, state_dict=None):
         """Initialize the model, possibly with a state dict for faster fitting."""
-        model = SingleTaskGP(self.train_X, self.train_Y)
+        if len(self.searchspace.tensor_categorical_dimensions) == 0:
+            model = SingleTaskGP(self.train_X, self.train_Y)
+        else:
+            model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
         mll = ExactMarginalLogLikelihood(model.likelihood, model)
         # SumMarginalLogLikelihood
         if state_dict is not None:
@@ -110,7 +113,6 @@ def run(self, max_fevals: int):
                 ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
-                # TODO look into how to handle categorical parameters with MixedSingleTaskGP
                 candidate, _ = optimize_acqf_discrete(
                     ei, 
                     q=1, 

From 68aee140f336672cc723617c4f03ff70ac3b6c1f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:49:53 -0800
Subject: [PATCH 055/253] Implemented variational GP and likelihood

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 30 +++++++++++++-------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index d0c56476e..38ea837e5 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -6,9 +6,9 @@
     import torch
     from botorch import fit_gpytorch_mll
     from botorch.acquisition import ExpectedImprovement
-    from botorch.models import MixedSingleTaskGP, SingleTaskGP
+    from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.optim import optimize_acqf_discrete
-    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
     bayes_opt_present = True
 except ImportError:
@@ -85,16 +85,26 @@ def initial_sample(self):
         self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
 
-    def initialize_model(self, state_dict=None):
-        """Initialize the model, possibly with a state dict for faster fitting."""
-        if len(self.searchspace.tensor_categorical_dimensions) == 0:
-            model = SingleTaskGP(self.train_X, self.train_Y)
+    def initialize_model(self, state_dict=None, exact=True):
+        """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
+        # initialize the model
+        if exact:
+            if len(self.searchspace.tensor_categorical_dimensions) == 0:
+                model = SingleTaskGP(self.train_X, self.train_Y)
+            else:
+                model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
         else:
-            model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
-        mll = ExactMarginalLogLikelihood(model.likelihood, model)
-        # SumMarginalLogLikelihood
-        if state_dict is not None:
+            model = SingleTaskVariationalGP(self.train_X, self.train_Y)
+
+        # load the previous state
+        if exact and state_dict is not None:
             model.load_state_dict(state_dict)
+
+        # initialize the likelihood
+        if exact:
+            mll = ExactMarginalLogLikelihood(model.likelihood, model)
+        else:
+            mll = VariationalELBO(model.likelihood, model.model, num_data=self.train_Y.size(0))
         return mll, model
 
     def run(self, max_fevals: int):

From b9c012dc29d4983fe5c330efe598f06837a61e74 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 8 Nov 2024 15:55:56 -0800
Subject: [PATCH 056/253] Using LogExpectedImprovement to avoid stability
 issues

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 38ea837e5..68028d72c 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -5,7 +5,7 @@
 try:
     import torch
     from botorch import fit_gpytorch_mll
-    from botorch.acquisition import ExpectedImprovement
+    from botorch.acquisition import LogExpectedImprovement
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.optim import optimize_acqf_discrete
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
@@ -120,7 +120,7 @@ def run(self, max_fevals: int):
                 fit_gpytorch_mll(mll)
                 
                 # Define the acquisition function
-                ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                ei = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
                 candidate, _ = optimize_acqf_discrete(

From 41ce663aa9425b43b1861c6ad4c0a4bff9140e57 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 8 Nov 2024 23:13:41 -0800
Subject: [PATCH 057/253] Implemented tensor space bounds in searchspace

---
 kernel_tuner/searchspace.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6b3e54e21..7e9315d06 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -58,8 +58,10 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.tensorspace = None
-        self.tensor_categorical_dimensions = []
+        self._tensorspace = None
+        self._tensorspace_bounds = None
+        self._tensorspace_bounds_indices = []
+        self._tensorspace_categorical_dimensions = []
         self._map_tensor_to_param = []
         self._map_param_to_tensor = []
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -594,28 +596,42 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
     
     def initialize_tensorspace(self):
         """Encode the searchspace as floats in a Tensor. Save the mapping."""
-        assert self.tensorspace is None, "Tensorspace is already initialized"
+        assert self._tensorspace is None, "Tensorspace is already initialized"
+        bounds = []
 
         # generate the mappings to and from tensor values
         for index, param_values in enumerate(self.params_values):
+            # convert numericals to float, or encode categorical
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = np.array(param_values).astype(float)
             else:
-                self.tensor_categorical_dimensions.append(index)
+                self._tensorspace_categorical_dimensions.append(index)
                 tensor_values = np.arange(len(param_values))
+
             self._map_param_to_tensor.append(dict(zip(param_values, tensor_values)))
             self._map_tensor_to_param.append(dict(zip(tensor_values, param_values)))
+            bounds.append((tensor_values.min(), tensor_values.max()))
+            if tensor_values.min() < tensor_values.max():
+                self._tensorspace_bounds_indices.append(index)
 
         # apply the mappings on the full searchspace
         numpy_repr = self.get_list_numpy()
         numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
-        self.tensorspace = torch.from_numpy(numpy_repr.astype(float))
+        self._tensorspace = torch.from_numpy(numpy_repr.astype(float))
+
+        # set the bounds in the correct format (one array for the min, one for the max)
+        bounds = torch.from_numpy(np.array(bounds))
+        self._tensorspace_bounds = torch.cat([bounds[:,0], bounds[:,1]]).reshape((2, bounds.shape[0]))
     
     def get_tensorspace(self):
         """Get the searchspace encoded in a Tensor."""
-        if self.tensorspace is None:
+        if self._tensorspace is None:
             self.initialize_tensorspace()
-        return self.tensorspace
+        return self._tensorspace
+    
+    def get_tensorspace_categorical_dimensions(self):
+        """Get the a list of the categorical dimensions in the tensorspace."""
+        return self._tensorspace_categorical_dimensions
     
     def param_config_to_tensor(self, param_config: tuple):
         """Convert from a parameter configuration to a Tensor."""
@@ -644,6 +660,12 @@ def tensor_to_param_config(self, tensor: Tensor):
         for i, param in enumerate(tensor):
             config.append(self._map_tensor_to_param[i][float(param)])
         return tuple(config)
+    
+    def get_tensorspace_bounds(self):
+        """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters."""
+        if self._tensorspace is None:
+            self.initialize_tensorspace()
+        return self._tensorspace_bounds, self._tensorspace_bounds_indices
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""

From 07ef1d49dd91e2c84aefe1ba95c9d08ef63f0e0c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 8 Nov 2024 23:15:14 -0800
Subject: [PATCH 058/253] Implemented normalization for input features

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 68028d72c..8026c4c13 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -7,6 +7,7 @@
     from botorch import fit_gpytorch_mll
     from botorch.acquisition import LogExpectedImprovement
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
+    from botorch.models.transforms import Normalize, Standardize
     from botorch.optim import optimize_acqf_discrete
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
@@ -87,14 +88,21 @@ def initial_sample(self):
 
     def initialize_model(self, state_dict=None, exact=True):
         """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
+        train_X = self.train_X
+        train_Y = self.train_Y
+        # transforms = dict(input_transform=Normalize(train_X.dim()), outcome_transform=Standardize(train_Y.dim()))
+        bounds, bounds_indices = self.searchspace.get_tensorspace_bounds()
+        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds))
+
         # initialize the model
         if exact:
-            if len(self.searchspace.tensor_categorical_dimensions) == 0:
-                model = SingleTaskGP(self.train_X, self.train_Y)
+            catdims = self.searchspace.get_tensorspace_categorical_dimensions()
+            if len(catdims) == 0:
+                model = SingleTaskGP(train_X, train_Y, **transforms)
             else:
-                model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
+                model = MixedSingleTaskGP(train_X, train_Y, cat_dims=catdims, **transforms)
         else:
-            model = SingleTaskVariationalGP(self.train_X, self.train_Y)
+            model = SingleTaskVariationalGP(train_X, train_Y, **transforms)
 
         # load the previous state
         if exact and state_dict is not None:
@@ -104,7 +112,7 @@ def initialize_model(self, state_dict=None, exact=True):
         if exact:
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
         else:
-            mll = VariationalELBO(model.likelihood, model.model, num_data=self.train_Y.size(0))
+            mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
     def run(self, max_fevals: int):

From 721d072414aa1cc3eb4c1b3a5a8481dcfaac9883 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 00:55:47 -0800
Subject: [PATCH 059/253] Tensorspace is reduced by removing inconsequential
 parameters

---
 kernel_tuner/searchspace.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 7e9315d06..8b9ac0299 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -62,8 +62,9 @@ def __init__(
         self._tensorspace_bounds = None
         self._tensorspace_bounds_indices = []
         self._tensorspace_categorical_dimensions = []
-        self._map_tensor_to_param = []
-        self._map_param_to_tensor = []
+        self._tensorspace_param_config_structure = []
+        self._map_tensor_to_param = {}
+        self._map_param_to_tensor = {}
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -601,6 +602,14 @@ def initialize_tensorspace(self):
 
         # generate the mappings to and from tensor values
         for index, param_values in enumerate(self.params_values):
+            # filter out parameters that do not matter, more efficient and avoids bounds problem
+            if len(param_values) < 2 or all(p == param_values[0] for p in param_values):
+                # keep track of skipped parameters, add them back in conversion functions
+                self._tensorspace_param_config_structure.append(param_values[0])
+                continue
+            else:
+                self._tensorspace_param_config_structure.append(None)
+
             # convert numericals to float, or encode categorical
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = np.array(param_values).astype(float)
@@ -608,12 +617,18 @@ def initialize_tensorspace(self):
                 self._tensorspace_categorical_dimensions.append(index)
                 tensor_values = np.arange(len(param_values))
 
-            self._map_param_to_tensor.append(dict(zip(param_values, tensor_values)))
-            self._map_tensor_to_param.append(dict(zip(tensor_values, param_values)))
+            # write the mappings to the object
+            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values)))
+            self._map_tensor_to_param[index] = (dict(zip(tensor_values, param_values)))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
                 self._tensorspace_bounds_indices.append(index)
 
+        # do some checks
+        assert len(self.params_values) == len(self._tensorspace_param_config_structure)
+        assert len(self._map_param_to_tensor) == len(self._map_tensor_to_param) == len(bounds)
+        assert len(self._tensorspace_bounds_indices) <= len(bounds)
+
         # apply the mappings on the full searchspace
         numpy_repr = self.get_list_numpy()
         numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
@@ -639,6 +654,8 @@ def param_config_to_tensor(self, param_config: tuple):
             self.initialize_tensorspace()
         array = []
         for i, param in enumerate(param_config):
+            if self._tensorspace_param_config_structure[i] is not None:
+                continue    # skip over parameters not in the tensorspace
             mapping = self._map_param_to_tensor[i]
             conversions = [None, str, float, int, bool]
             for c in conversions:
@@ -656,9 +673,14 @@ def tensor_to_param_config(self, tensor: Tensor):
         assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
         if len(self._map_tensor_to_param) == 0:
             self.initialize_tensorspace()
-        config = []
-        for i, param in enumerate(tensor):
-            config.append(self._map_tensor_to_param[i][float(param)])
+        config = self._tensorspace_param_config_structure.copy()
+        skip_counter = 0
+        for i, param in enumerate(config):
+            if param is not None:
+                skip_counter += 1
+            else:
+                value = float(tensor[i-skip_counter])
+                config[i] = self._map_tensor_to_param[i][value]
         return tuple(config)
     
     def get_tensorspace_bounds(self):

From 2434b3b93bed8d49e88342d6e227ece243c8bd1d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 01:11:18 -0800
Subject: [PATCH 060/253] Extended strategies tests  to include single
 parameter value

---
 test/strategies/test_cache_file.json | 49 +++++++++++++++++++---------
 test/strategies/test_strategies.py   |  2 ++
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/test/strategies/test_cache_file.json b/test/strategies/test_cache_file.json
index 5e0c0e054..6073d4b62 100644
--- a/test/strategies/test_cache_file.json
+++ b/test/strategies/test_cache_file.json
@@ -4,6 +4,7 @@
     "tune_params_keys": [
         "block_size_x",
         "test_string",
+        "test_single",
         "test_bool",
         "test_mixed"
     ],
@@ -29,6 +30,9 @@
             "alg_1",
             "alg_2"
         ],
+        "test_single": [
+            15
+        ],
         "test_bool": [
             true,
             false
@@ -41,9 +45,10 @@
         ]
     },
     "cache": {
-        "128,alg_2,True,2.45": {
+        "128,alg_2,15,True,2.45": {
             "block_size_x": 128,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04073600071881499,
@@ -63,9 +68,10 @@
             "framework_time": 0.8587837219238281,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "192,alg_2,True,2.45": {
+        "192,alg_2,15,True,2.45": {
             "block_size_x": 192,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04095085710287094,
@@ -85,9 +91,10 @@
             "framework_time": 1.6656816005706787,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "256,alg_2,True,2.45": {
+        "256,alg_2,15,True,2.45": {
             "block_size_x": 256,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04189257137477398,
@@ -107,9 +114,10 @@
             "framework_time": 1.6054585576057434,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "320,alg_2,True,2.45": {
+        "320,alg_2,15,True,2.45": {
             "block_size_x": 320,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04208914376795292,
@@ -129,9 +137,10 @@
             "framework_time": 1.4494173228740692,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "384,alg_2,True,2.45": {
+        "384,alg_2,15,True,2.45": {
             "block_size_x": 384,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04174171467976911,
@@ -151,9 +160,10 @@
             "framework_time": 1.682564616203308,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "448,alg_2,True,2.45": {
+        "448,alg_2,15,True,2.45": {
             "block_size_x": 448,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.03249828570655414,
@@ -173,9 +183,10 @@
             "framework_time": 1.5890561044216156,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "512,alg_2,True,2.45": {
+        "512,alg_2,15,True,2.45": {
             "block_size_x": 512,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04139885599059718,
@@ -195,9 +206,10 @@
             "framework_time": 1.853298395872116,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "576,alg_2,True,2.45": {
+        "576,alg_2,15,True,2.45": {
             "block_size_x": 576,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04189257137477398,
@@ -217,9 +229,10 @@
             "framework_time": 1.8403716385364532,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "640,alg_2,True,2.45": {
+        "640,alg_2,15,True,2.45": {
             "block_size_x": 640,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.0411702852163996,
@@ -239,9 +252,10 @@
             "framework_time": 1.8264725804328918,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "704,alg_2,True,2.45": {
+        "704,alg_2,15,True,2.45": {
             "block_size_x": 704,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04184228580977235,
@@ -261,9 +275,10 @@
             "framework_time": 1.6709677875041962,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "768,alg_2,True,2.45": {
+        "768,alg_2,15,True,2.45": {
             "block_size_x": 768,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.03175771422684193,
@@ -283,9 +298,10 @@
             "framework_time": 1.7531625926494598,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "832,alg_2,True,2.45": {
+        "832,alg_2,15,True,2.45": {
             "block_size_x": 832,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.040941715240478516,
@@ -305,9 +321,10 @@
             "framework_time": 2.1368376910686493,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "896,alg_2,True,2.45": {
+        "896,alg_2,15,True,2.45": {
             "block_size_x": 896,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04177371359297207,
@@ -327,9 +344,10 @@
             "framework_time": 2.03637033700943,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "960,alg_2,True,2.45": {
+        "960,alg_2,15,True,2.45": {
             "block_size_x": 960,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.042189714631864,
@@ -349,9 +367,10 @@
             "framework_time": 1.7383433878421783,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "1024,alg_2,True,2.45": {
+        "1024,alg_2,15,True,2.45": {
             "block_size_x": 1024,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04114742816558906,
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index b7b2851dd..9c0e9faca 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -32,6 +32,7 @@ def vector_add():
     tune_params = dict()
     tune_params["block_size_x"] = [128 + 64 * i for i in range(15)]
     tune_params["test_string"] = ["alg_1", "alg_2"]
+    tune_params["test_single"] = [15]
     tune_params["test_bool"] = [True, False]
     tune_params["test_mixed"] = ["test", 1, True, 2.45]
 
@@ -82,6 +83,7 @@ def test_strategies(vector_add, strategy):
     expected_items = {
         'block_size_x': int,
         'test_string': str,
+        'test_single': int,
         'test_bool': bool,
         'test_mixed': float,
         'time': (float, int),

From 1679751be384a7c0c82a85fc1af15d770a5b0711 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 01:12:24 -0800
Subject: [PATCH 061/253] Fixed an indexing error for tensorspace bounds

---
 kernel_tuner/searchspace.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 8b9ac0299..3085688c1 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -598,6 +598,7 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
     def initialize_tensorspace(self):
         """Encode the searchspace as floats in a Tensor. Save the mapping."""
         assert self._tensorspace is None, "Tensorspace is already initialized"
+        skipped_count = 0
         bounds = []
 
         # generate the mappings to and from tensor values
@@ -606,6 +607,7 @@ def initialize_tensorspace(self):
             if len(param_values) < 2 or all(p == param_values[0] for p in param_values):
                 # keep track of skipped parameters, add them back in conversion functions
                 self._tensorspace_param_config_structure.append(param_values[0])
+                skipped_count += 1
                 continue
             else:
                 self._tensorspace_param_config_structure.append(None)
@@ -614,7 +616,7 @@ def initialize_tensorspace(self):
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = np.array(param_values).astype(float)
             else:
-                self._tensorspace_categorical_dimensions.append(index)
+                self._tensorspace_categorical_dimensions.append(index-skipped_count)
                 tensor_values = np.arange(len(param_values))
 
             # write the mappings to the object
@@ -622,7 +624,7 @@ def initialize_tensorspace(self):
             self._map_tensor_to_param[index] = (dict(zip(tensor_values, param_values)))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
-                self._tensorspace_bounds_indices.append(index)
+                self._tensorspace_bounds_indices.append(index-skipped_count)
 
         # do some checks
         assert len(self.params_values) == len(self._tensorspace_param_config_structure)

From 2b816a641168dda4add7731fc0bb31f5ba599cdd Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 01:20:57 -0800
Subject: [PATCH 062/253] Extended searchspace tests to include single
 parameter value

---
 test/test_searchspace.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 48f049750..eaf546387 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -27,6 +27,10 @@
 simple_searchspace = Searchspace(simple_tune_params, restrict, max_threads)
 simple_searchspace_bruteforce = Searchspace(simple_tune_params, restrict, max_threads, framework="bruteforce")
 
+simple_tune_params_single = simple_tune_params.copy()
+simple_tune_params_single["s"] = [True]
+simple_searchspace_single = Searchspace(simple_tune_params_single, restrict, max_threads)
+
 # 3.1 million combinations, of which 10600 pass the restrictions
 num_layers = 42
 tune_params = dict()
@@ -185,11 +189,12 @@ def test_get_tensorspace():
 
 def test_conversion_tensor_param_config():
     """Test the conversion from a parameter configuration to a tensor and tensor to parameter configuration."""
-    for config in simple_searchspace.list:
-        tensor = simple_searchspace.param_config_to_tensor(config)
-        config_2 = simple_searchspace.tensor_to_param_config(tensor)
+    for config in simple_searchspace_single.list:
+        tensor = simple_searchspace_single.param_config_to_tensor(config)
+        config_2 = simple_searchspace_single.tensor_to_param_config(tensor)
         assert config == config_2
-        assert tensor.equal(simple_searchspace.param_config_to_tensor(config_2))
+        assert tensor.equal(simple_searchspace_single.param_config_to_tensor(config_2))
+        assert len(tensor) == len(config) - 1
 
 
 def test_random_sample():

From c417585b05bfde8415bd1c0d4ad70b282750d60b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 21:55:10 -0800
Subject: [PATCH 063/253] Implemented additional acquisition functions, reduced
 number of reinitializations

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 31 +++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 8026c4c13..3aff658ab 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -1,11 +1,19 @@
 """Bayesian Optimization implementation using BO Torch."""
 
+from math import ceil
+
 import numpy as np
 
 try:
     import torch
     from botorch import fit_gpytorch_mll
-    from botorch.acquisition import LogExpectedImprovement
+    from botorch.acquisition import (
+        LogExpectedImprovement,
+        ProbabilityOfImprovement,
+        qExpectedUtilityOfBestOption,
+        qLogExpectedImprovement,
+        qLowerBoundMaxValueEntropy,
+    )
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
     from botorch.optim import optimize_acqf_discrete
@@ -115,25 +123,31 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int):
+    def run(self, max_fevals: int, feval_per_loop=1):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
-                mll, model = self.initialize_model()
+            mll, model = self.initialize_model()
 
             # Bayesian optimization loop
-            for _ in range(max_fevals):
+            max_loops = ceil(max_fevals/feval_per_loop)
+            for f in range(max_loops):
                 # fit a Gaussian Process model
                 fit_gpytorch_mll(mll)
                 
                 # Define the acquisition function
-                ei = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                # acqf = NoisyExpectedImprovement(model=model, , maximize=False)
+                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=False)
+                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.min())
+                # acqf = qExpectedUtilityOfBestOption(pref_model=model)
                 
                 # Optimize acquisition function to find the next evaluation point
                 candidate, _ = optimize_acqf_discrete(
-                    ei, 
-                    q=1, 
+                    acqf, 
+                    q=feval_per_loop, 
                     choices=self.searchspace_tensors
                 )
                 
@@ -141,7 +155,8 @@ def run(self, max_fevals: int):
                 self.evaluate_configs(candidate)
 
                 # reinitialize the models so they are ready for fitting on next iteration
-                mll, model = self.initialize_model(model.state_dict())
+                if f < max_loops - 1:
+                    mll, model = self.initialize_model(model.state_dict())
         except util.StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From 3d53b29af0d7b58e76b5d7431a6a8a20cdddd0c2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 22:49:00 -0800
Subject: [PATCH 064/253] Implemented division of tensorspace into chunks for
 faster optimization

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 39 ++++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 3aff658ab..4bc7b482b 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -42,7 +42,7 @@ class BayesianOptimization():
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         """Initialization of the Bayesian Optimization class. Does not evaluate configurations."""
         self.initial_sample_taken = False
-        self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
+        self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
@@ -123,12 +123,13 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int, feval_per_loop=1):
+    def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
             mll, model = self.initialize_model()
+            num_fevals = self.initial_sample_size
 
             # Bayesian optimization loop
             max_loops = ceil(max_fevals/feval_per_loop)
@@ -136,23 +137,37 @@ def run(self, max_fevals: int, feval_per_loop=1):
                 # fit a Gaussian Process model
                 fit_gpytorch_mll(mll)
                 
-                # Define the acquisition function
+                # define the acquisition function
                 acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 # acqf = NoisyExpectedImprovement(model=model, , maximize=False)
                 # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=False)
                 # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.min())
                 # acqf = qExpectedUtilityOfBestOption(pref_model=model)
+
+                # divide the optimization space into random chuncks
+                tensorspace_size = self.searchspace_tensors.size(0)
+                num_optimization_spaces = max(min(feval_per_loop, max_fevals-num_fevals), ceil(tensorspace_size / max_batch_size))
+                if num_optimization_spaces <= 1:
+                    optimization_spaces = [self.searchspace_tensors]
+                else:
+                    # shuffle the searchspace
+                    shuffled_indices = torch.randperm(tensorspace_size)
+                    tensorspace = self.searchspace_tensors[shuffled_indices]
+                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
                 
-                # Optimize acquisition function to find the next evaluation point
-                candidate, _ = optimize_acqf_discrete(
-                    acqf, 
-                    q=feval_per_loop, 
-                    choices=self.searchspace_tensors
-                )
-                
-                # evaluate the new candidate
-                self.evaluate_configs(candidate)
+                # optimize acquisition function to find the next evaluation point
+                for optimization_space in optimization_spaces:
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    self.evaluate_configs(candidate)
+                    num_fevals += 1
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 if f < max_loops - 1:

From 3ed43a68a9e6ffc42fa63138ee18177aa3021072 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 11 Nov 2024 17:48:49 -0800
Subject: [PATCH 065/253] Switch to fit_gpytorch_mll_torch for faster fitting,
 use approximate mode

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 4bc7b482b..6375caa62 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -17,18 +17,30 @@
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
     from botorch.optim import optimize_acqf_discrete
+    from botorch.optim.fit import fit_gpytorch_mll_torch
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
     bayes_opt_present = True
 except ImportError:
     bayes_opt_present = False
 
+import gpytorch.settings as gp_settings
+import linear_operator.settings as linop_settings
+
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import (
     CostFunc,
 )
 
+# set gpytorch to approximate mode for faster fitting
+linop_settings._fast_covar_root_decomposition._default = True
+linop_settings._fast_log_prob._default = True
+linop_settings._fast_solves._default = True
+linop_settings.cholesky_max_tries._global_value = 6
+linop_settings.max_cholesky_size._global_value = 800
+gp_settings.max_eager_kernel_size._global_value = 800
+
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     """The entry function for tuning a searchspace using this algorithm."""
@@ -49,6 +61,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         # set up conversion to tensors
         self.searchspace = searchspace
         self.searchspace_tensors = searchspace.get_tensorspace()
+        self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
         self.train_X = torch.empty(0)
         self.train_Y = torch.empty(0)
 
@@ -99,8 +112,7 @@ def initialize_model(self, state_dict=None, exact=True):
         train_X = self.train_X
         train_Y = self.train_Y
         # transforms = dict(input_transform=Normalize(train_X.dim()), outcome_transform=Standardize(train_Y.dim()))
-        bounds, bounds_indices = self.searchspace.get_tensorspace_bounds()
-        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds))
+        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds))
 
         # initialize the model
         if exact:
@@ -135,7 +147,7 @@ def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
             max_loops = ceil(max_fevals/feval_per_loop)
             for f in range(max_loops):
                 # fit a Gaussian Process model
-                fit_gpytorch_mll(mll)
+                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
                 
                 # define the acquisition function
                 acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)

From 559813fbf81b4a21da6687bab5f3453016320f76 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 11 Nov 2024 20:40:16 -0800
Subject: [PATCH 066/253] Implemented running BO on GPU / Apple Silicon,
 settable precision

---
 kernel_tuner/searchspace.py                  | 36 +++++++++++++-------
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 14 +++++---
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 3085688c1..201052e8d 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -59,6 +59,9 @@ def __init__(
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
         self._tensorspace = None
+        self.tensor_dtype = torch.float32 if torch_available else None
+        self.tensor_device = torch.device("cpu") if torch_available else None
+        self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device)
         self._tensorspace_bounds = None
         self._tensorspace_bounds_indices = []
         self._tensorspace_categorical_dimensions = []
@@ -595,11 +598,16 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
         # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space
         return self.__dict.get(param_config, None)
     
-    def initialize_tensorspace(self):
-        """Encode the searchspace as floats in a Tensor. Save the mapping."""
+    def initialize_tensorspace(self, dtype = None, device = None):
+        """Encode the searchspace in a Tensor. Save the mapping. Call this function directly to control the precision or device used."""
         assert self._tensorspace is None, "Tensorspace is already initialized"
         skipped_count = 0
         bounds = []
+        if dtype is not None:
+            self.tensor_dtype = dtype
+        if device is not None:
+            self.tensor_device = device
+        self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device)
 
         # generate the mappings to and from tensor values
         for index, param_values in enumerate(self.params_values):
@@ -614,14 +622,15 @@ def initialize_tensorspace(self):
 
             # convert numericals to float, or encode categorical
             if all(isinstance(v, numbers.Real) for v in param_values):
-                tensor_values = np.array(param_values).astype(float)
+                tensor_values = torch.tensor(param_values, dtype=self.tensor_dtype)
             else:
                 self._tensorspace_categorical_dimensions.append(index-skipped_count)
-                tensor_values = np.arange(len(param_values))
+                # tensor_values = np.arange(len(param_values))
+                tensor_values = torch.arange(len(param_values), dtype=self.tensor_dtype)
 
             # write the mappings to the object
-            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values)))
-            self._map_tensor_to_param[index] = (dict(zip(tensor_values, param_values)))
+            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values.tolist())))
+            self._map_tensor_to_param[index] = (dict(zip(tensor_values.tolist(), param_values)))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
                 self._tensorspace_bounds_indices.append(index-skipped_count)
@@ -632,16 +641,17 @@ def initialize_tensorspace(self):
         assert len(self._tensorspace_bounds_indices) <= len(bounds)
 
         # apply the mappings on the full searchspace
-        numpy_repr = self.get_list_numpy()
-        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
-        self._tensorspace = torch.from_numpy(numpy_repr.astype(float))
+        # numpy_repr = self.get_list_numpy()
+        # numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
+        # self._tensorspace = torch.from_numpy(numpy_repr.astype(self.tensor_dtype)).to(self.tensor_device)
+        self._tensorspace = torch.stack(tuple(map(self.param_config_to_tensor, self.list)))
 
         # set the bounds in the correct format (one array for the min, one for the max)
-        bounds = torch.from_numpy(np.array(bounds))
+        bounds = torch.tensor(bounds, **self.tensor_kwargs)
         self._tensorspace_bounds = torch.cat([bounds[:,0], bounds[:,1]]).reshape((2, bounds.shape[0]))
     
     def get_tensorspace(self):
-        """Get the searchspace encoded in a Tensor."""
+        """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first."""
         if self._tensorspace is None:
             self.initialize_tensorspace()
         return self._tensorspace
@@ -668,7 +678,7 @@ def param_config_to_tensor(self, param_config: tuple):
                 except (KeyError, ValueError) as e:
                     if c == conversions[-1]:
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
-        return torch.from_numpy(np.array(array))
+        return torch.tensor(array, **self.tensor_kwargs)
     
     def tensor_to_param_config(self, tensor: Tensor):
         """Convert from a Tensor to a parameter configuration."""
@@ -681,7 +691,7 @@ def tensor_to_param_config(self, tensor: Tensor):
             if param is not None:
                 skip_counter += 1
             else:
-                value = float(tensor[i-skip_counter])
+                value = tensor[i-skip_counter].item()
                 config[i] = self._map_tensor_to_param[i][value]
         return tuple(config)
     
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 6375caa62..18c7264a5 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -58,12 +58,16 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.tuning_options = tuning_options
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
+        # select the device to use (CUDA or Apple Silicon MPS if available)
+        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu")
+
         # set up conversion to tensors
         self.searchspace = searchspace
+        self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
         self.searchspace_tensors = searchspace.get_tensorspace()
         self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
-        self.train_X = torch.empty(0)
-        self.train_Y = torch.empty(0)
+        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs)
+        self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
 
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
@@ -95,14 +99,14 @@ def evaluate_configs(self, X: Tensor):
 
             # add valid results to the training set
             if len(valid_configs) > 0 and len(valid_results) > 0:
-                self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
-                self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
+                self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
+                self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
     def initial_sample(self):
         """Take an initial sample."""
-        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size))
+        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
         sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
         self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True

From c391428de491cc7d97ae711686eae44f1afaf933 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 11 Nov 2024 20:56:46 -0800
Subject: [PATCH 067/253] Removed Apple Silicon MPS support as cholesky
 operation is not yet implemented

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 18c7264a5..3f0bf6fee 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -59,7 +59,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
         # select the device to use (CUDA or Apple Silicon MPS if available)
-        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu")
+        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS.
+        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
         # set up conversion to tensors
         self.searchspace = searchspace

From 07925c5732fe4ad083bfd01cc7b5381047826316 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 00:42:05 -0800
Subject: [PATCH 068/253] Implemented discrete local search for cases where the
 tensorspace isn't split

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 28 ++++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 3f0bf6fee..f15415a2e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -16,7 +16,7 @@
     )
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
-    from botorch.optim import optimize_acqf_discrete
+    from botorch.optim import optimize_acqf_discrete, optimize_acqf_discrete_local_search
     from botorch.optim.fit import fit_gpytorch_mll_torch
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
@@ -140,7 +140,7 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
+    def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
@@ -175,12 +175,24 @@ def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
                 
                 # optimize acquisition function to find the next evaluation point
                 for optimization_space in optimization_spaces:
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
+
+                    # optimize over a lattice if the space is too large
+                    if max_batch_size < optimization_space.size(0):
+                        candidate, _ = optimize_acqf_discrete_local_search(
+                            acqf, 
+                            q=1,
+                            discrete_choices=optimization_space, 
+                            max_batch_size=max_batch_size,
+                            num_restarts=5,
+                            raw_samples=1024
+                        )
+                    else:
+                        candidate, _ = optimize_acqf_discrete(
+                            acqf, 
+                            q=1, 
+                            choices=optimization_space,
+                            max_batch_size=max_batch_size
+                        )
                     
                     # evaluate the new candidate
                     self.evaluate_configs(candidate)

From 4113513cfef60ab0eead6c30cf67ddaec3ee0d4d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 15:11:33 -0800
Subject: [PATCH 069/253] Implemented standardization of output

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index f15415a2e..fa4fc44e6 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -116,8 +116,10 @@ def initialize_model(self, state_dict=None, exact=True):
         """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
         train_X = self.train_X
         train_Y = self.train_Y
-        # transforms = dict(input_transform=Normalize(train_X.dim()), outcome_transform=Standardize(train_Y.dim()))
-        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds))
+        transforms = dict(
+            input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
+            outcome_transform=Standardize(m=train_Y.size(-1))
+        )
 
         # initialize the model
         if exact:
@@ -179,9 +181,9 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
                     # optimize over a lattice if the space is too large
                     if max_batch_size < optimization_space.size(0):
                         candidate, _ = optimize_acqf_discrete_local_search(
-                            acqf, 
+                            acqf,
                             q=1,
-                            discrete_choices=optimization_space, 
+                            discrete_choices=optimization_space,
                             max_batch_size=max_batch_size,
                             num_restarts=5,
                             raw_samples=1024

From ed12b5a1b3dd402691a7e398b05005e1cb0ea03f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 15:27:02 -0800
Subject: [PATCH 070/253] Implemented unified optimization direction

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index fa4fc44e6..ac2fa3ea3 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -57,6 +57,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
+        self.maximize = tuning_options['objective_higher_is_better']
 
         # select the device to use (CUDA or Apple Silicon MPS if available)
         # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS.
@@ -76,6 +77,8 @@ def run_config(self, config: tuple):
         valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result)
         if not valid:
             result = np.nan
+        elif not self.maximize:
+            result = -result
         return [result], valid
 
     def evaluate_configs(self, X: Tensor):
@@ -157,11 +160,11 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
                 fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
                 
                 # define the acquisition function
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
-                # acqf = NoisyExpectedImprovement(model=model, , maximize=False)
-                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
-                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=False)
-                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.min())
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                # acqf = NoisyExpectedImprovement(model=model, , maximize=True)
+                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=True)
+                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.max())
                 # acqf = qExpectedUtilityOfBestOption(pref_model=model)
 
                 # divide the optimization space into random chuncks

From d62c9410f68bac31daa02557edf1f1f8ed99cdbc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 15:31:24 -0800
Subject: [PATCH 071/253] Updated outcome standardization

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index ac2fa3ea3..1031448bb 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -121,7 +121,7 @@ def initialize_model(self, state_dict=None, exact=True):
         train_Y = self.train_Y
         transforms = dict(
             input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
-            outcome_transform=Standardize(m=train_Y.size(-1))
+            outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
         )
 
         # initialize the model

From 1c015cb579172139feb72e263d5114e1d2701c67 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 17:35:33 -0800
Subject: [PATCH 072/253] Using extra information from variance in BO for
 better fits

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 28 +++++++++++++-------
 kernel_tuner/strategies/common.py            | 13 ++++++++-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 1031448bb..1443a5a09 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -56,11 +56,11 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.initial_sample_taken = False
         self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
-        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
+        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True, return_raw=True)
         self.maximize = tuning_options['objective_higher_is_better']
 
         # select the device to use (CUDA or Apple Silicon MPS if available)
-        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS.
+        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS (issue reported: https://github.com/pytorch/pytorch/issues/77764).
         self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
         # set up conversion to tensors
@@ -70,31 +70,39 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
         self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
+        self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
 
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
-        result = self.cost_func(config)
-        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result)
+        result, results = self.cost_func(config)
+        results = np.array(results)
+        var = np.nan
+        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
         if not valid:
             result = np.nan
         elif not self.maximize:
             result = -result
-        return [result], valid
+            results = -results
+        if valid:
+            var = np.var(results)
+        return [result], [var], valid
 
     def evaluate_configs(self, X: Tensor):
         """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
         if isinstance(X, Tensor):
             valid_configs = []
             valid_results = []
+            valid_vars = []
             if X.dim() == 1:
                 X = [X]
             for config in X:
                 assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
                 param_config = self.searchspace.tensor_to_param_config(config)
-                res, valid = self.run_config(param_config)
+                res, var, valid = self.run_config(param_config)
                 if valid:
                     valid_configs.append(config)
                     valid_results.append(res)
+                    valid_vars.append(var)
                 
                 # remove evaluated configurations from the full searchspace
                 index = self.searchspace.get_param_config_index(param_config)
@@ -102,9 +110,10 @@ def evaluate_configs(self, X: Tensor):
                                                       self.searchspace_tensors[index+1:]))
 
             # add valid results to the training set
-            if len(valid_configs) > 0 and len(valid_results) > 0:
+            if len(valid_configs) > 0 and len(valid_results) > 0 and len(valid_vars) > 0:
                 self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
                 self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
+                self.train_Yvar = torch.cat([self.train_Yvar, torch.tensor(valid_vars, **self.searchspace.tensor_kwargs)])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
@@ -119,6 +128,7 @@ def initialize_model(self, state_dict=None, exact=True):
         """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
         train_X = self.train_X
         train_Y = self.train_Y
+        train_Yvar = self.train_Yvar
         transforms = dict(
             input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
             outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
@@ -128,9 +138,9 @@ def initialize_model(self, state_dict=None, exact=True):
         if exact:
             catdims = self.searchspace.get_tensorspace_categorical_dimensions()
             if len(catdims) == 0:
-                model = SingleTaskGP(train_X, train_Y, **transforms)
+                model = SingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, **transforms)
             else:
-                model = MixedSingleTaskGP(train_X, train_Y, cat_dims=catdims, **transforms)
+                model = MixedSingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, cat_dims=catdims, **transforms)
         else:
             model = SingleTaskVariationalGP(train_X, train_Y, **transforms)
 
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 7901f97a0..eb0b81e27 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -62,7 +62,7 @@ class CostFunc:
 
     def __init__(
         self, searchspace: Searchspace, tuning_options, runner, *, 
-        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False
+        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False, return_raw=None
     ):
         """An abstract method to handle evaluation of configurations.
 
@@ -74,12 +74,16 @@ def __init__(
             snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True.
             encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
             return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
+            return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None.
         """        
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
         self.encode_non_numeric = encode_non_numeric
         self.return_invalid = return_invalid
+        self.return_raw = return_raw
+        if return_raw is True:
+            self.return_raw = f"{tuning_options['objective']}s"
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
@@ -157,6 +161,13 @@ def __call__(self, x, check_restrictions=True):
             return_value = result[self.tuning_options.objective] or sys.float_info.max
         return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value
 
+        # include raw data in return if requested
+        if self.return_raw is not None:
+            try:
+                return return_value, result[self.return_raw]
+            except KeyError:
+                return return_value, [np.nan]
+
         return return_value
 
     def get_bounds_x0_eps(self):

From cad10f8ec6a81f878eb0933965682f59b0dda59b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 20:00:21 -0800
Subject: [PATCH 073/253] Implemented gradual cooldown on multi-feval depending
 on number of fevals left

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 63 ++++++++++++--------
 1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 1443a5a09..d4127d43c 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -1,6 +1,6 @@
 """Bayesian Optimization implementation using BO Torch."""
 
-from math import ceil
+from math import ceil, sqrt
 
 import numpy as np
 
@@ -155,17 +155,32 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
+    def run(self, max_fevals: int, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
             mll, model = self.initialize_model()
-            num_fevals = self.initial_sample_size
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # create array to gradually reduce number of optimization spaces as fewer fevals are left
+            tensorspace_size = self.searchspace_tensors.size(0)
+            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
+            fevals_left -= reserve_final_loops
+            num_loops = min(max(round(sqrt(fevals_left)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = round(tensorspace_size / max_batch_size)  # set the average number of optimization spaces
+            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
+            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
+            # if there's a discrepency, add or subtract the difference from the first number
+            if np.sum(nums_optimization_spaces) != fevals_left:
+                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
+            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
+            fevals_left += reserve_final_loops
 
             # Bayesian optimization loop
-            max_loops = ceil(max_fevals/feval_per_loop)
-            for f in range(max_loops):
+            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
+                num_optimization_spaces = min(num_optimization_spaces, fevals_left)
+
                 # fit a Gaussian Process model
                 fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
                 
@@ -179,7 +194,6 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
 
                 # divide the optimization space into random chuncks
                 tensorspace_size = self.searchspace_tensors.size(0)
-                num_optimization_spaces = max(min(feval_per_loop, max_fevals-num_fevals), ceil(tensorspace_size / max_batch_size))
                 if num_optimization_spaces <= 1:
                     optimization_spaces = [self.searchspace_tensors]
                 else:
@@ -191,30 +205,31 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
                 # optimize acquisition function to find the next evaluation point
                 for optimization_space in optimization_spaces:
 
+                    # NOTE optimize_acqf_discrete_local_search does not work with variable optimization_space size
                     # optimize over a lattice if the space is too large
-                    if max_batch_size < optimization_space.size(0):
-                        candidate, _ = optimize_acqf_discrete_local_search(
-                            acqf,
-                            q=1,
-                            discrete_choices=optimization_space,
-                            max_batch_size=max_batch_size,
-                            num_restarts=5,
-                            raw_samples=1024
-                        )
-                    else:
-                        candidate, _ = optimize_acqf_discrete(
-                            acqf, 
-                            q=1, 
-                            choices=optimization_space,
-                            max_batch_size=max_batch_size
-                        )
+                    # if len(optimization_spaces) == 1 and max_batch_size < optimization_space.size(0):
+                    #     candidate, _ = optimize_acqf_discrete_local_search(
+                    #         acqf,
+                    #         q=1,
+                    #         discrete_choices=optimization_space,
+                    #         max_batch_size=max_batch_size,
+                    #         num_restarts=5,
+                    #         raw_samples=1024
+                    #     )
+                    # else:
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
                     
                     # evaluate the new candidate
                     self.evaluate_configs(candidate)
-                    num_fevals += 1
+                    fevals_left -= 1
 
                 # reinitialize the models so they are ready for fitting on next iteration
-                if f < max_loops - 1:
+                if loop_i < len(nums_optimization_spaces) - 1:
                     mll, model = self.initialize_model(model.state_dict())
         except util.StopCriterionReached as e:
             if self.tuning_options.verbose:

From 1ed0352e88f0f64694fd2ebd51286dd74fca23d7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 18 Nov 2024 18:59:01 -0800
Subject: [PATCH 074/253] Adjusted the calculation of number of optimization
 spaces to be more gradual

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index d4127d43c..949fdb459 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -167,8 +167,8 @@ def run(self, max_fevals: int, max_batch_size=2048):
             tensorspace_size = self.searchspace_tensors.size(0)
             reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
             fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = round(tensorspace_size / max_batch_size)  # set the average number of optimization spaces
+            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
             numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
             nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
             # if there's a discrepency, add or subtract the difference from the first number

From 38f084ceca9154c68311d6fb0339658ebc892650 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 18 Nov 2024 22:43:37 -0800
Subject: [PATCH 075/253] Two different kernels as test files for BO

---
 tune_bo.py => tune_bo_conv.py |  2 +-
 tune_bo_dedisp.py             | 88 +++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)
 rename tune_bo.py => tune_bo_conv.py (99%)
 create mode 100644 tune_bo_dedisp.py

diff --git a/tune_bo.py b/tune_bo_conv.py
similarity index 99%
rename from tune_bo.py
rename to tune_bo_conv.py
index 81f1fe999..03ee7f2fa 100644
--- a/tune_bo.py
+++ b/tune_bo_conv.py
@@ -30,7 +30,7 @@ def tune(
     quiet=False,
     simulation_mode=True,
     lang="CUDA",
-    profiling=True,
+    profiling=False,
 ):  
     directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
     assert directory.exists()
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
new file mode 100644
index 000000000..2cfb3b58b
--- /dev/null
+++ b/tune_bo_dedisp.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+import os
+from collections import OrderedDict
+from pathlib import Path
+
+import kernel_tuner as kt
+
+nr_dms = 2048
+nr_samples = 25000
+nr_channels = 1536
+max_shift = 650
+nr_samples_per_channel = (nr_samples+max_shift)
+down_sampling = 1
+dm_first = 0.0
+dm_step = 0.02
+
+channel_bandwidth = 0.1953125
+sampling_time = 0.00004096
+min_freq = 1425.0
+max_freq = min_freq + (nr_channels-1) * channel_bandwidth
+
+
+def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+
+    args = []
+
+    answer = [None, None, None]
+
+    problem_size = (nr_samples, nr_dms, 1)
+    tune_params = OrderedDict()
+    tune_params["block_size_x"] = [1, 2, 4, 8] + [16*i for i in range(1,3)]
+    tune_params["block_size_y"] = [8*i for i in range(4,33)]
+    tune_params["block_size_z"] = [1]
+    tune_params["tile_size_x"] = [i for i in range(1,5)]
+    tune_params["tile_size_y"] = [i for i in range(1,9)]
+    tune_params["tile_stride_x"] = [0, 1]
+    tune_params["tile_stride_y"] = [0, 1]
+    tune_params["loop_unroll_factor_channel"] = [0] #+ [i for i in range(1,nr_channels+1) if nr_channels % i == 0] #[i for i in range(nr_channels+1)]
+
+    cp = [f"-I{os.path.dirname(os.path.realpath(__file__))}"]
+
+
+    check_block_size = "32 <= block_size_x * block_size_y <= 1024"
+    check_loop_x = "loop_unroll_factor_x <= tile_size_x and tile_size_x % loop_unroll_factor_x == 0"
+    check_loop_y = "loop_unroll_factor_y <= tile_size_y and tile_size_y % loop_unroll_factor_y == 0"
+    check_loop_channel = f"loop_unroll_factor_channel <= {nr_channels} and loop_unroll_factor_channel and {nr_channels} % loop_unroll_factor_channel == 0"
+
+    check_tile_stride_x = "tile_size_x > 1 or tile_stride_x == 0"
+    check_tile_stride_y = "tile_size_y > 1 or tile_stride_y == 0"
+
+    config_valid = [check_block_size, check_tile_stride_x, check_tile_stride_y]
+
+    metrics = OrderedDict()
+    gbytes = (nr_dms * nr_samples * nr_channels)/1e9
+    metrics["GB/s"] = lambda p: gbytes / (p['time'] / 1e3)
+
+    directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
+    cachefile = directory / f"cachefiles/dedispersion_milo/{device}.json"
+    assert directory.exists()
+    if lang == "CUDA":
+        kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu"
+    elif lang == "HIP":
+        kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu.hip"
+    else:
+        raise ValueError(f"Invalid {lang=}")
+
+    def run():
+        return kt.tune_kernel("dedispersion_kernel", kernel_file, problem_size, args, tune_params,
+                                answer=answer, compiler_options=cp, restrictions=config_valid, device=0,
+                                cache=cachefile, lang=lang, iterations=32, metrics=metrics, 
+                                simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, strategy_options=strategy_options)
+    
+    # start tuning
+    if profiling:
+        import cProfile
+
+        with cProfile.Profile() as pr:
+            results, env = run()
+            if profiling:
+                pr.dump_stats('bo_prof_torchfit_2.prof')
+    else:
+        results, env = run()
+
+    return results, env
+
+if __name__ == "__main__":
+
+    tune("A100")

From c447dc27f372a70cece027ef474799e27600e314 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 22:19:25 -0800
Subject: [PATCH 076/253] Setup structure for BOTorch transfer learning
 strategy as separate strategy

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |  8 ++--
 .../strategies/bayes_opt_BOTorch_transfer.py  | 38 +++++++++++++++++++
 2 files changed, 41 insertions(+), 5 deletions(-)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 949fdb459..689d64183 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -16,7 +16,7 @@
     )
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
-    from botorch.optim import optimize_acqf_discrete, optimize_acqf_discrete_local_search
+    from botorch.optim import optimize_acqf_discrete
     from botorch.optim.fit import fit_gpytorch_mll_torch
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
@@ -29,9 +29,7 @@
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.common import (
-    CostFunc,
-)
+from kernel_tuner.strategies.common import CostFunc
 
 # set gpytorch to approximate mode for faster fitting
 linop_settings._fast_covar_root_decomposition._default = True
@@ -235,4 +233,4 @@ def run(self, max_fevals: int, max_batch_size=2048):
             if self.tuning_options.verbose:
                 print(e)
 
-        return self.cost_func.results 
+        return self.cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
new file mode 100644
index 000000000..627d37a75
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -0,0 +1,38 @@
+"""Bayesian Optimization implementation using BO Torch."""
+
+try:
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    bo = BayesianOptimization(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimizationTransfer(BayesianOptimization):
+    """Bayesian Optimization class with transfer learning."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        super().__init__(searchspace, runner, tuning_options)
+
+    def run_config(self, config: tuple):
+        return super().run_config(config)
+    
+    def evaluate_configs(self, X: Tensor):
+        return super().evaluate_configs(X)
+    
+    def initial_sample(self):
+        return super().initial_sample()
+    
+    def initialize_model(self, state_dict=None, exact=True):
+        return super().initialize_model(state_dict, exact)
+    
+    def run(self, max_fevals: int, max_batch_size=2048):
+        return super().run(max_fevals, max_batch_size)
\ No newline at end of file

From 7c2fd5112c53dea03a8262a11bceb137acfd8714 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 22:43:41 -0800
Subject: [PATCH 077/253] Implemented Rank-Weighted GP Ensemble for
 transferlearning

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 365 +++++++++++++++++-
 1 file changed, 364 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 627d37a75..885bc6708 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -1,7 +1,22 @@
 """Bayesian Optimization implementation using BO Torch."""
 
 try:
+    import torch
+    from botorch.acquisition.logei import qLogNoisyExpectedImprovement
+    from botorch.fit import fit_gpytorch_mll
+    from botorch.models import SingleTaskGP
+    from botorch.models.gpytorch import GPyTorchModel
+    from botorch.optim.optimize import optimize_acqf
+    from botorch.sampling.normal import SobolQMCNormalSampler
+    from botorch.utils.sampling import draw_sobol_samples
+    from botorch.utils.transforms import normalize, unnormalize
+    from gpytorch.distributions import MultivariateNormal
+    from gpytorch.lazy import PsdSumLazyTensor
+    from gpytorch.likelihoods import LikelihoodList
+    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from gpytorch.models import GP
     from torch import Tensor
+    from torch.nn import ModuleList
     bayes_opt_present = True
 except ImportError:
     bayes_opt_present = False
@@ -9,6 +24,17 @@
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
 
+# settings
+NUM_BASE_TASKS = 5
+N_BATCH = 10
+NUM_POSTERIOR_SAMPLES = 256
+RANDOM_INITIALIZATION_SIZE = 3
+N_TRIALS = 10
+MC_SAMPLES = 512
+N_RESTART_CANDIDATES = 512
+N_RESTARTS = 10
+Q_BATCH_SIZE = 1
+
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     """The entry function for tuning a searchspace using this algorithm."""
@@ -22,6 +48,45 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
+        self.best_rgpe_all = []
+        self.best_random_all = []
+        self.best_vanilla_nei_all = []
+        self.noise_std = 0.05
+
+        # Sample data for each base task
+        data_by_task = {}
+        for task in range(NUM_BASE_TASKS):
+            num_training_points = 20
+            # draw points from a sobol sequence
+            raw_x = draw_sobol_samples(
+                bounds=BOUNDS,
+                n=num_training_points,
+                q=1,
+                seed=task + 5397923,
+            ).squeeze(1)
+            # get observed values
+            f_x = f(raw_x, task_shift(task + 1))
+            train_y = f_x + noise_std * torch.randn_like(f_x)
+            train_yvar = torch.full_like(train_y, noise_std**2)
+            # store training data
+            data_by_task[task] = {
+                # scale x to [0, 1]
+                "train_x": normalize(raw_x, bounds=BOUNDS),
+                "train_y": train_y,
+                "train_yvar": train_yvar,
+            }
+
+        # Fit base model
+        base_model_list = []
+        for task in range(NUM_BASE_TASKS):
+            print(f"Fitting base model {task}")
+            model = self.get_fitted_model(
+                data_by_task[task]["train_x"],
+                data_by_task[task]["train_y"],
+                data_by_task[task]["train_yvar"],
+            )
+            base_model_list.append(model)
+
     def run_config(self, config: tuple):
         return super().run_config(config)
     
@@ -34,5 +99,303 @@ def initial_sample(self):
     def initialize_model(self, state_dict=None, exact=True):
         return super().initialize_model(state_dict, exact)
     
+    def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
+        """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""
+        model = SingleTaskGP(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
+        if state_dict is None:
+            mll = ExactMarginalLogLikelihood(model.likelihood, model).to(train_X)
+            fit_gpytorch_mll(mll)
+        else:
+            model.load_state_dict(state_dict)
+        return model
+    
+    def roll_col(self, X, shift):
+        """Rotate columns to right by shift."""
+        return torch.cat((X[..., -shift:], X[..., :-shift]), dim=-1)
+    
+    def compute_ranking_loss(self, f_samps, target_y):
+        """Compute ranking loss for each sample from the posterior over target points.
+
+        Args:
+            f_samps: `n_samples x (n) x n`-dim tensor of samples
+            target_y: `n x 1`-dim tensor of targets
+        Returns:
+            Tensor: `n_samples`-dim tensor containing the ranking loss across each sample
+        """
+        n = target_y.shape[0]
+        if f_samps.ndim == 3:
+            # Compute ranking loss for target model
+            # take cartesian product of target_y
+            cartesian_y = torch.cartesian_prod(
+                target_y.squeeze(-1),
+                target_y.squeeze(-1),
+            ).view(n, n, 2)
+            # the diagonal of f_samps are the out-of-sample predictions
+            # for each LOO model, compare the out of sample predictions to each in-sample prediction
+            rank_loss = (
+                (
+                    (f_samps.diagonal(dim1=1, dim2=2).unsqueeze(-1) < f_samps)
+                    ^ (cartesian_y[..., 0] < cartesian_y[..., 1])
+                )
+                .sum(dim=-1)
+                .sum(dim=-1)
+            )
+        else:
+            rank_loss = torch.zeros(
+                f_samps.shape[0], dtype=torch.long, device=target_y.device
+            )
+            y_stack = target_y.squeeze(-1).expand(f_samps.shape)
+            for i in range(1, target_y.shape[0]):
+                rank_loss += (
+                    (self.roll_col(f_samps, i) < f_samps) ^ (self.roll_col(y_stack, i) < y_stack)
+                ).sum(dim=-1)
+        return rank_loss
+    
+    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples):
+        """Create a batch-mode LOOCV GP and draw a joint sample across all points from the target task.
+
+        Args:
+            train_x: `n x d` tensor of training points
+            train_y: `n x 1` tensor of training targets
+            target_model: fitted target model
+            num_samples: number of mc samples to draw
+
+        Return: `num_samples x n x n`-dim tensor of samples, where dim=1 represents the `n` LOO models,
+            and dim=2 represents the `n` training points.
+        """
+        batch_size = len(train_x)
+        masks = torch.eye(len(train_x), dtype=torch.uint8, device=self.tensor_device).bool()
+        train_x_cv = torch.stack([train_x[~m] for m in masks])
+        train_y_cv = torch.stack([train_y[~m] for m in masks])
+        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks])
+        state_dict = target_model.state_dict()
+        # expand to batch size of batch_mode LOOCV model
+        state_dict_expanded = {
+            name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
+            for name, t in state_dict.items()
+        }
+        model = self.get_fitted_model(
+            train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
+        )
+        with torch.no_grad():
+            posterior = model.posterior(train_x)
+            # Since we have a batch mode gp and model.posterior always returns an output dimension,
+            # the output from `posterior.sample()` here `num_samples x n x n x 1`, so let's squeeze
+            # the last dimension.
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
+            return sampler(posterior).squeeze(-1)
+    
+    def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_samples):
+        """Compute ranking weights for each base model and the target model (using LOOCV for the target model).
+        
+        Note: This implementation does not currently address weight dilution, since we only have a small number of base models.
+
+        Args:
+            train_x: `n x d` tensor of training points (for target task)
+            train_y: `n` tensor of training targets (for target task)
+            base_models: list of base models
+            target_model: target model
+            num_samples: number of mc samples
+
+        Returns:
+            Tensor: `n_t`-dim tensor with the ranking weight for each model
+        """
+        ranking_losses = []
+        # compute ranking loss for each base model
+        for task in range(len(base_models)):
+            model = base_models[task]
+            # compute posterior over training points for target task
+            posterior = model.posterior(train_x)
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
+            base_f_samps = sampler(posterior).squeeze(-1).squeeze(-1)
+            # compute and save ranking loss
+            ranking_losses.append(self.compute_ranking_loss(base_f_samps, train_y))
+        # compute ranking loss for target model using LOOCV
+        # f_samps
+        target_f_samps = self.get_target_model_loocv_sample_preds(
+            train_x,
+            train_y,
+            train_yvar,
+            target_model,
+            num_samples,
+        )
+        ranking_losses.append(self.compute_ranking_loss(target_f_samps, train_y))
+        ranking_loss_tensor = torch.stack(ranking_losses)
+        # compute best model (minimum ranking loss) for each sample
+        best_models = torch.argmin(ranking_loss_tensor, dim=0)
+        # compute proportion of samples for which each model is best
+        rank_weights = (
+            best_models.bincount(minlength=len(ranking_losses)).type_as(train_x)
+            / num_samples
+        )
+        return rank_weights
+    
     def run(self, max_fevals: int, max_batch_size=2048):
-        return super().run(max_fevals, max_batch_size)
\ No newline at end of file
+        # Average over multiple trials
+        for trial in range(N_TRIALS):
+            print(f"Trial {trial + 1} of {N_TRIALS}")
+            best_rgpe = []
+            best_random = []
+            best_vanilla_nei = []
+            # Initial random observations
+            raw_x = draw_sobol_samples(
+                bounds=BOUNDS, n=RANDOM_INITIALIZATION_SIZE, q=1, seed=trial
+            ).squeeze(1)
+            train_x = normalize(raw_x, bounds=BOUNDS)
+            train_y_noiseless = f(raw_x)
+            train_y = train_y_noiseless + noise_std * torch.randn_like(train_y_noiseless)
+            train_yvar = torch.full_like(train_y, noise_std**2)
+            vanilla_nei_train_x = train_x.clone()
+            vanilla_nei_train_y = train_y.clone()
+            vanilla_nei_train_yvar = train_yvar.clone()
+            # keep track of the best observed point at each iteration
+            best_value = train_y.max().item()
+            best_rgpe.append(best_value)
+            best_random.append(best_value)
+            vanilla_nei_best_value = best_value
+            best_vanilla_nei.append(vanilla_nei_best_value)
+
+            # Run N_BATCH rounds of BayesOpt after the initial random batch
+            for iteration in range(N_BATCH):
+                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
+                model_list = base_model_list + [target_model]
+                rank_weights = self.compute_rank_weights(
+                    train_x,
+                    train_y,
+                    base_model_list,
+                    target_model,
+                    NUM_POSTERIOR_SAMPLES,
+                )
+
+                # create model and acquisition function
+                rgpe_model = RGPE(model_list, rank_weights)
+                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
+                qNEI = qLogNoisyExpectedImprovement(
+                    model=rgpe_model,
+                    X_baseline=train_x,
+                    sampler=sampler_qnei,
+                    prune_baseline=False,
+                )
+
+                # optimize
+                candidate, _ = optimize_acqf(
+                    acq_function=qNEI,
+                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
+                    q=Q_BATCH_SIZE,
+                    num_restarts=N_RESTARTS,
+                    raw_samples=N_RESTART_CANDIDATES,
+                )
+
+                # fetch the new values
+                new_x = candidate.detach()
+                new_y_noiseless = f(unnormalize(new_x, bounds=BOUNDS))
+                new_y = new_y_noiseless + noise_std * torch.randn_like(new_y_noiseless)
+                new_yvar = torch.full_like(new_y, noise_std**2)
+
+                # update training points
+                train_x = torch.cat((train_x, new_x))
+                train_y = torch.cat((train_y, new_y))
+                train_yvar = torch.cat((train_yvar, new_yvar))
+                random_candidate = torch.rand(1, **self.searchspace.tensor_kwargs)
+                next_random_noiseless = f(unnormalize(random_candidate, bounds=BOUNDS))
+                next_random = next_random_noiseless + noise_std * torch.randn_like(
+                    next_random_noiseless
+                )
+                next_random_best = next_random.max().item()
+                best_random.append(max(best_random[-1], next_random_best))
+
+                # get the new best observed value
+                best_value = train_y.max().item()
+                best_rgpe.append(best_value)
+
+                # Run Vanilla NEI for comparison
+                vanilla_nei_model = self.get_fitted_model(
+                    vanilla_nei_train_x,
+                    vanilla_nei_train_y,
+                    vanilla_nei_train_yvar,
+                )
+                vanilla_nei_sampler = SobolQMCNormalSampler(
+                    sample_shape=torch.Size([MC_SAMPLES])
+                )
+                vanilla_qNEI = qLogNoisyExpectedImprovement(
+                    model=vanilla_nei_model,
+                    X_baseline=vanilla_nei_train_x,
+                    sampler=vanilla_nei_sampler,
+                )
+                vanilla_nei_candidate, _ = optimize_acqf(
+                    acq_function=vanilla_qNEI,
+                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
+                    q=Q_BATCH_SIZE,
+                    num_restarts=N_RESTARTS,
+                    raw_samples=N_RESTART_CANDIDATES,
+                )
+                # fetch the new values
+                vanilla_nei_new_x = vanilla_nei_candidate.detach()
+                vanilla_nei_new_y_noiseless = f(unnormalize(vanilla_nei_new_x, bounds=BOUNDS))
+                vanilla_nei_new_y = vanilla_nei_new_y_noiseless + noise_std * torch.randn_like(
+                    new_y_noiseless
+                )
+                vanilla_nei_new_yvar = torch.full_like(vanilla_nei_new_y, noise_std**2)
+
+                # update training points
+                vanilla_nei_train_x = torch.cat([vanilla_nei_train_x, vanilla_nei_new_x])
+                vanilla_nei_train_y = torch.cat([vanilla_nei_train_y, vanilla_nei_new_y])
+                vanilla_nei_train_yvar = torch.cat(
+                    [vanilla_nei_train_yvar, vanilla_nei_new_yvar]
+                )
+
+                # get the new best observed value
+                vanilla_nei_best_value = vanilla_nei_train_y.max().item()
+                best_vanilla_nei.append(vanilla_nei_best_value)
+
+            self.best_rgpe_all.append(best_rgpe)
+            self.best_random_all.append(best_random)
+            self.best_vanilla_nei_all.append(best_vanilla_nei)
+
+
+class RGPE(GP, GPyTorchModel):
+    """Rank-weighted GP ensemble.
+    
+    Note: this class inherits from GPyTorchModel which provides an interface for GPyTorch models in botorch.
+    """
+
+    _num_outputs = 1  # metadata for botorch
+
+    def __init__(self, models, weights):
+        super().__init__()
+        self.models = ModuleList(models)
+        for m in models:
+            if not hasattr(m, "likelihood"):
+                raise ValueError(
+                    "RGPE currently only supports models that have a likelihood (e.g. ExactGPs)"
+                )
+        self.likelihood = LikelihoodList(*[m.likelihood for m in models])
+        self.weights = weights
+        self.to(weights)
+
+    def forward(self, x):
+        weighted_means = []
+        weighted_covars = []
+        # filter model with zero weights
+        # weights on covariance matrices are weight**2
+        non_zero_weight_indices = (self.weights**2 > 0).nonzero()
+        non_zero_weights = self.weights[non_zero_weight_indices]
+        # re-normalize
+        non_zero_weights /= non_zero_weights.sum()
+
+        for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
+            raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
+            model = self.models[raw_idx]
+            posterior = model.posterior(x)
+            # unstandardize predictions
+            posterior_mean = posterior.mean.squeeze(-1)
+            posterior_cov = posterior.mvn.lazy_covariance_matrix
+            # apply weight
+            weight = non_zero_weights[non_zero_weight_idx]
+            weighted_means.append(weight * posterior_mean)
+            weighted_covars.append(posterior_cov * weight**2)
+        # set mean and covariance to be the rank-weighted sum the means and covariances of the
+        # base models and target model
+        mean_x = torch.stack(weighted_means).sum(dim=0)
+        covar_x = PsdSumLazyTensor(*weighted_covars)
+        return MultivariateNormal(mean_x, covar_x)
\ No newline at end of file

From fec0e65d706a66269a1a17d156908ab615f0c6fd Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 23:02:03 -0800
Subject: [PATCH 078/253] Avoided import of whole util submodule

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 689d64183..45ac275a9 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -27,9 +27,9 @@
 import gpytorch.settings as gp_settings
 import linear_operator.settings as linop_settings
 
-from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import CostFunc
+from kernel_tuner.util import ErrorConfig, StopCriterionReached
 
 # set gpytorch to approximate mode for faster fitting
 linop_settings._fast_covar_root_decomposition._default = True
@@ -75,7 +75,7 @@ def run_config(self, config: tuple):
         result, results = self.cost_func(config)
         results = np.array(results)
         var = np.nan
-        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
+        valid = not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
         if not valid:
             result = np.nan
         elif not self.maximize:
@@ -229,7 +229,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 # reinitialize the models so they are ready for fitting on next iteration
                 if loop_i < len(nums_optimization_spaces) - 1:
                     mll, model = self.initialize_model(model.state_dict())
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)
 

From 091ef47ac0dc361a60429633730a79a390076f45 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 23:55:06 -0800
Subject: [PATCH 079/253] Simplified BO transfer run loop

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 168 ++++++------------
 1 file changed, 54 insertions(+), 114 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 885bc6708..f8106658a 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -2,11 +2,11 @@
 
 try:
     import torch
-    from botorch.acquisition.logei import qLogNoisyExpectedImprovement
-    from botorch.fit import fit_gpytorch_mll
+    from botorch.acquisition import LogExpectedImprovement
+    from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
     from botorch.models import SingleTaskGP
     from botorch.models.gpytorch import GPyTorchModel
-    from botorch.optim.optimize import optimize_acqf
+    from botorch.optim.optimize import optimize_acqf_discrete, optimize_acqf_discrete_local_search
     from botorch.sampling.normal import SobolQMCNormalSampler
     from botorch.utils.sampling import draw_sobol_samples
     from botorch.utils.transforms import normalize, unnormalize
@@ -23,6 +23,7 @@
 
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+from kernel_tuner.util import StopCriterionReached
 
 # settings
 NUM_BASE_TASKS = 5
@@ -201,6 +202,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             Tensor: `n_t`-dim tensor with the ranking weight for each model
         """
         ranking_losses = []
+        
         # compute ranking loss for each base model
         for task in range(len(base_models)):
             model = base_models[task]
@@ -210,6 +212,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             base_f_samps = sampler(posterior).squeeze(-1).squeeze(-1)
             # compute and save ranking loss
             ranking_losses.append(self.compute_ranking_loss(base_f_samps, train_y))
+
         # compute ranking loss for target model using LOOCV
         # f_samps
         target_f_samps = self.get_target_model_loocv_sample_preds(
@@ -231,35 +234,19 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
         return rank_weights
     
     def run(self, max_fevals: int, max_batch_size=2048):
-        # Average over multiple trials
-        for trial in range(N_TRIALS):
-            print(f"Trial {trial + 1} of {N_TRIALS}")
-            best_rgpe = []
-            best_random = []
-            best_vanilla_nei = []
-            # Initial random observations
-            raw_x = draw_sobol_samples(
-                bounds=BOUNDS, n=RANDOM_INITIALIZATION_SIZE, q=1, seed=trial
-            ).squeeze(1)
-            train_x = normalize(raw_x, bounds=BOUNDS)
-            train_y_noiseless = f(raw_x)
-            train_y = train_y_noiseless + noise_std * torch.randn_like(train_y_noiseless)
-            train_yvar = torch.full_like(train_y, noise_std**2)
-            vanilla_nei_train_x = train_x.clone()
-            vanilla_nei_train_y = train_y.clone()
-            vanilla_nei_train_yvar = train_yvar.clone()
-            # keep track of the best observed point at each iteration
-            best_value = train_y.max().item()
-            best_rgpe.append(best_value)
-            best_random.append(best_value)
-            vanilla_nei_best_value = best_value
-            best_vanilla_nei.append(vanilla_nei_best_value)
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
+            mll, model = self.initialize_model()
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # Bayesian optimization loop
+            for _ in range(fevals_left):
 
-            # Run N_BATCH rounds of BayesOpt after the initial random batch
-            for iteration in range(N_BATCH):
-                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
+                target_model = get_fitted_model(train_x, train_y, train_yvar)
                 model_list = base_model_list + [target_model]
-                rank_weights = self.compute_rank_weights(
+                rank_weights = compute_rank_weights(
                     train_x,
                     train_y,
                     base_model_list,
@@ -267,90 +254,43 @@ def run(self, max_fevals: int, max_batch_size=2048):
                     NUM_POSTERIOR_SAMPLES,
                 )
 
-                # create model and acquisition function
-                rgpe_model = RGPE(model_list, rank_weights)
-                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
-                qNEI = qLogNoisyExpectedImprovement(
-                    model=rgpe_model,
-                    X_baseline=train_x,
-                    sampler=sampler_qnei,
-                    prune_baseline=False,
-                )
-
-                # optimize
-                candidate, _ = optimize_acqf(
-                    acq_function=qNEI,
-                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
-                    q=Q_BATCH_SIZE,
-                    num_restarts=N_RESTARTS,
-                    raw_samples=N_RESTART_CANDIDATES,
-                )
-
-                # fetch the new values
-                new_x = candidate.detach()
-                new_y_noiseless = f(unnormalize(new_x, bounds=BOUNDS))
-                new_y = new_y_noiseless + noise_std * torch.randn_like(new_y_noiseless)
-                new_yvar = torch.full_like(new_y, noise_std**2)
-
-                # update training points
-                train_x = torch.cat((train_x, new_x))
-                train_y = torch.cat((train_y, new_y))
-                train_yvar = torch.cat((train_yvar, new_yvar))
-                random_candidate = torch.rand(1, **self.searchspace.tensor_kwargs)
-                next_random_noiseless = f(unnormalize(random_candidate, bounds=BOUNDS))
-                next_random = next_random_noiseless + noise_std * torch.randn_like(
-                    next_random_noiseless
-                )
-                next_random_best = next_random.max().item()
-                best_random.append(max(best_random[-1], next_random_best))
-
-                # get the new best observed value
-                best_value = train_y.max().item()
-                best_rgpe.append(best_value)
-
-                # Run Vanilla NEI for comparison
-                vanilla_nei_model = self.get_fitted_model(
-                    vanilla_nei_train_x,
-                    vanilla_nei_train_y,
-                    vanilla_nei_train_yvar,
-                )
-                vanilla_nei_sampler = SobolQMCNormalSampler(
-                    sample_shape=torch.Size([MC_SAMPLES])
-                )
-                vanilla_qNEI = qLogNoisyExpectedImprovement(
-                    model=vanilla_nei_model,
-                    X_baseline=vanilla_nei_train_x,
-                    sampler=vanilla_nei_sampler,
-                )
-                vanilla_nei_candidate, _ = optimize_acqf(
-                    acq_function=vanilla_qNEI,
-                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
-                    q=Q_BATCH_SIZE,
-                    num_restarts=N_RESTARTS,
-                    raw_samples=N_RESTART_CANDIDATES,
-                )
-                # fetch the new values
-                vanilla_nei_new_x = vanilla_nei_candidate.detach()
-                vanilla_nei_new_y_noiseless = f(unnormalize(vanilla_nei_new_x, bounds=BOUNDS))
-                vanilla_nei_new_y = vanilla_nei_new_y_noiseless + noise_std * torch.randn_like(
-                    new_y_noiseless
-                )
-                vanilla_nei_new_yvar = torch.full_like(vanilla_nei_new_y, noise_std**2)
-
-                # update training points
-                vanilla_nei_train_x = torch.cat([vanilla_nei_train_x, vanilla_nei_new_x])
-                vanilla_nei_train_y = torch.cat([vanilla_nei_train_y, vanilla_nei_new_y])
-                vanilla_nei_train_yvar = torch.cat(
-                    [vanilla_nei_train_yvar, vanilla_nei_new_yvar]
-                )
-
-                # get the new best observed value
-                vanilla_nei_best_value = vanilla_nei_train_y.max().item()
-                best_vanilla_nei.append(vanilla_nei_best_value)
-
-            self.best_rgpe_all.append(best_rgpe)
-            self.best_random_all.append(best_random)
-            self.best_vanilla_nei_all.append(best_vanilla_nei)
+                # fit a Gaussian Process model
+                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
+                
+                # define the acquisition function
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                
+                # optimize acquisition function to find the next evaluation point
+                if max_batch_size < self.searchspace_tensors.size(0):
+                    # optimize over a lattice if the space is too large
+                    candidate, _ = optimize_acqf_discrete_local_search(
+                        acqf,
+                        q=1,
+                        discrete_choices=self.searchspace_tensors,
+                        max_batch_size=max_batch_size,
+                        num_restarts=5,
+                        raw_samples=1024
+                    )
+                else:
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=self.searchspace_tensors,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    self.evaluate_configs(candidate)
+                    fevals_left -= 1
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                if fevals_left > 0:
+                    mll, model = self.initialize_model(model.state_dict())
+        except StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
+
+        return self.cost_func.results
 
 
 class RGPE(GP, GPyTorchModel):

From ee11757760d947bfe49e06d7e7c75bd6b578e9d4 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 23:56:05 -0800
Subject: [PATCH 080/253] Implemented transfer learning caches in interface to
 be read and passed through tuning_options

---
 kernel_tuner/interface.py | 32 +++++++++++++++++++++++++++-----
 tune_bo_conv.py           |  7 ++++++-
 tune_bo_dedisp.py         | 12 ++++++++----
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index f48d105dc..56fd7d883 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -477,6 +477,15 @@ def __deepcopy__(self, _):
                 "string",
             ),
         ),
+        (
+            "transfer_learning_caches",
+            (
+                """Array of filepaths to caches to use for transfer learning.
+        Filename uses suffix ".json", which is appended if missing.
+        """,
+                "list(string) or list(Path)",
+            ),
+        ),
         ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
         ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
         ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
@@ -593,6 +602,7 @@ def tune_kernel(
     observers=None,
     objective=None,
     objective_higher_is_better=None,
+    transfer_learning_caches=[],
 ):
     start_overhead_time = perf_counter()
     if log:
@@ -679,18 +689,30 @@ def tune_kernel(
     # we normalize it so that it always accepts atol.
     tuning_options.verify = util.normalize_verify_function(tuning_options.verify)
 
+    def preprocess_cache(filepath):
+        if isinstance(filepath, Path):
+            filepath = str(filepath.resolve())
+        if filepath[-5:] != ".json":
+            filepath += ".json"
+        return filepath
+
     # process cache
     if cache:
-        if isinstance(cache, Path):
-            cache = str(cache.resolve())
-        if cache[-5:] != ".json":
-            cache += ".json"
-
+        cache = preprocess_cache(cache)
         util.process_cache(cache, kernel_options, tuning_options, runner)
     else:
         tuning_options.cache = {}
         tuning_options.cachefile = None
 
+    # process transfer learning caches
+    tuning_options.transfer_learning_caches = []
+    if transfer_learning_caches and len(transfer_learning_caches) > 0:
+        for transfer_learning_cache in transfer_learning_caches:
+            cache = preprocess_cache(transfer_learning_cache)
+            assert cache != tuning_options.cache, "Transfer learning cache can not be the same as current cache"
+            cache_data = util.read_cache(cache, open_cache=False)
+            tuning_options.transfer_learning_caches.append(cache_data)
+
     # create search space
     searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads)
     restrictions = searchspace._modified_restrictions
diff --git a/tune_bo_conv.py b/tune_bo_conv.py
index 03ee7f2fa..86a64ac8b 100644
--- a/tune_bo_conv.py
+++ b/tune_bo_conv.py
@@ -101,6 +101,10 @@ def tune(
     metrics = OrderedDict()
     metrics["GFLOP/s"] = lambda p: total_flops / (p["time"] / 1000.0)
 
+    cache_dir = directory / "cachefiles/convolution_milo"
+    cache_filename = f"{device_name}.json"
+    transfer_learning_caches = [p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filename]
+
     def run():
         return kernel_tuner.tune_kernel(
             "convolution_kernel",
@@ -112,7 +116,7 @@ def run():
             grid_div_x=grid_div_x,
             cmem_args=cmem_args,
             restrictions=restrict,
-            cache=directory / f"cachefiles/convolution_milo/{device_name}.json",
+            cache=cache_dir / cache_filename,
             metrics=metrics,
             lang=lang,
             iterations=32,
@@ -122,6 +126,7 @@ def run():
             strategy=strategy,
             strategy_options=strategy_options,
             simulation_mode=simulation_mode,
+            transfer_learning_caches=transfer_learning_caches
         )
 
     # start tuning
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
index 2cfb3b58b..ed41d729f 100644
--- a/tune_bo_dedisp.py
+++ b/tune_bo_dedisp.py
@@ -20,7 +20,7 @@
 max_freq = min_freq + (nr_channels-1) * channel_bandwidth
 
 
-def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+def tune(device_name, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
 
     args = []
 
@@ -55,7 +55,10 @@ def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals':
     metrics["GB/s"] = lambda p: gbytes / (p['time'] / 1e3)
 
     directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
-    cachefile = directory / f"cachefiles/dedispersion_milo/{device}.json"
+    cache_dir = directory / "cachefiles/dedispersion_milo"
+    cache_filename = f"{device_name}.json"
+    transfer_learning_caches = [p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filename]
+
     assert directory.exists()
     if lang == "CUDA":
         kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu"
@@ -67,8 +70,9 @@ def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals':
     def run():
         return kt.tune_kernel("dedispersion_kernel", kernel_file, problem_size, args, tune_params,
                                 answer=answer, compiler_options=cp, restrictions=config_valid, device=0,
-                                cache=cachefile, lang=lang, iterations=32, metrics=metrics, 
-                                simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, strategy_options=strategy_options)
+                                cache=cache_dir / cache_filename, lang=lang, iterations=32, metrics=metrics, 
+                                simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, 
+                                strategy_options=strategy_options, transfer_learning_caches=transfer_learning_caches)
     
     # start tuning
     if profiling:

From 1162ece8c0f12edc9d2e156b234bfd7cefd28075 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 19:26:29 -0800
Subject: [PATCH 081/253] Added BO transfer learning strategy

---
 kernel_tuner/interface.py                            |  2 ++
 .../strategies/bayes_opt_BOTorch_transfer.py         | 12 +++++++++---
 tune_bo_conv.py                                      |  3 +--
 tune_bo_dedisp.py                                    |  2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 56fd7d883..54aa737f6 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,6 +52,7 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
+    bayes_opt_BOTorch_transfer,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -91,6 +92,7 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
+    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer,
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index f8106658a..3457a3a4e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -40,7 +40,7 @@
 def tune(searchspace: Searchspace, runner, tuning_options):
     """The entry function for tuning a searchspace using this algorithm."""
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimization(searchspace, runner, tuning_options)
+    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
     return bo.run(max_fevals)
 
 class BayesianOptimizationTransfer(BayesianOptimization):
@@ -49,6 +49,12 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
+        self.searchspaces_transfer_learning = []
+        for tl_cache in tuning_options.transfer_learning_caches:
+            self.searchspaces_transfer_learning.append(Searchspace(None, None, None, from_cache=tl_cache))
+
+        raise ValueError(self.searchspaces_transfer_learning)
+
         self.best_rgpe_all = []
         self.best_random_all = []
         self.best_vanilla_nei_all = []
@@ -202,7 +208,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             Tensor: `n_t`-dim tensor with the ranking weight for each model
         """
         ranking_losses = []
-        
+
         # compute ranking loss for each base model
         for task in range(len(base_models)):
             model = base_models[task]
@@ -244,7 +250,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
             # Bayesian optimization loop
             for _ in range(fevals_left):
 
-                target_model = get_fitted_model(train_x, train_y, train_yvar)
+                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
                 model_list = base_model_list + [target_model]
                 rank_weights = compute_rank_weights(
                     train_x,
diff --git a/tune_bo_conv.py b/tune_bo_conv.py
index 86a64ac8b..61635c51f 100644
--- a/tune_bo_conv.py
+++ b/tune_bo_conv.py
@@ -21,10 +21,9 @@ def ops(w, h, fw, fh):
 total_flops = ops(w, h, fw, fh)
 
 
-# def tune(inputs, lang, strategy):
 def tune(
     device_name: str,
-    strategy="bayes_opt_BOTorch",
+    strategy="bayes_opt_BOTorch_transfer",
     strategy_options={ 'max_fevals': 150 },
     verbose=True,
     quiet=False,
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
index ed41d729f..67a56c17e 100644
--- a/tune_bo_dedisp.py
+++ b/tune_bo_dedisp.py
@@ -20,7 +20,7 @@
 max_freq = min_freq + (nr_channels-1) * channel_bandwidth
 
 
-def tune(device_name, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+def tune(device_name, strategy="bayes_opt_BOTorch_transfer", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
 
     args = []
 

From 62fa13587309fe5acbd1e71f869f5c693833fd24 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 19:27:01 -0800
Subject: [PATCH 082/253] Implemented optionally constructing a searchspace
 from a cache dictionary

---
 kernel_tuner/searchspace.py | 73 +++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 201052e8d..281bd2009 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -42,6 +42,7 @@ def __init__(
         block_size_names=default_block_size_names,
         build_neighbors_index=False,
         neighbor_method=None,
+        from_cache: dict=None,
         framework="PythonConstraint",
         solver_method="PC_OptimizedBacktrackingSolver",
         path_to_ATF_cache: Path = None,
@@ -53,7 +54,15 @@ def __init__(
             adjacent: picks closest parameter value in both directions for each parameter
             Hamming: any parameter config with 1 different parameter value is a neighbor
         Optionally sort the searchspace by the order in which the parameter values were specified. By default, sort goes from first to last parameter, to reverse this use sort_last_param_first.
+        Optionally an imported cache can be used instead with `from_cache`, in which case the `tune_params`, `restrictions` and `max_threads` arguments can be set to None, and construction is skipped.
         """
+        # check the arguments
+        if from_cache is not None:
+            assert tune_params is None and restrictions is None and max_threads is None, "When `from_cache` is used, the positional arguments must be set to None."
+            tune_params = from_cache["tune_params"]
+        if from_cache is None:
+            assert tune_params is not None and restrictions is not None and max_threads is not None, "Must specify positional arugments ."
+
         # set the object attributes using the arguments
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
@@ -96,36 +105,44 @@ def __init__(
                 try_to_constraint=framework_l == "pythonconstraint",
             )
 
-        # get the framework given the framework argument
-        if framework_l == "pythonconstraint":
-            searchspace_builder = self.__build_searchspace
-        elif framework_l == "pysmt":
-            searchspace_builder = self.__build_searchspace_pysmt
-        elif framework_l == "pyatf":
-            searchspace_builder = self.__build_searchspace_pyATF
-        elif framework_l == "atf_cache":
-            searchspace_builder = self.__build_searchspace_ATF_cache
-            self.path_to_ATF_cache = path_to_ATF_cache
-        elif framework_l == "bruteforce":
-            searchspace_builder = self.__build_searchspace_bruteforce
-        else:
-            raise ValueError(f"Invalid framework parameter {framework}")
-
-        # get the solver given the solver method argument
-        solver = ""
-        if solver_method.lower() == "pc_backtrackingsolver":
-            solver = BacktrackingSolver()
-        elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
-            solver = OptimizedBacktrackingSolver(forwardcheck=False)
-        elif solver_method.lower() == "pc_recursivebacktrackingsolver":
-            solver = RecursiveBacktrackingSolver()
-        elif solver_method.lower() == "pc_minconflictssolver":
-            solver = MinConflictsSolver()
+        # if an imported cache, skip building and set the values directly
+        if from_cache is not None:
+            configs = list(dict(from_cache["cache"]).keys())
+            self.list, self.__dict, self.size = None, None, len(configs)    # TODO
+            raise ValueError(configs)
         else:
-            raise ValueError(f"Solver method {solver_method} not recognized.")
+            # get the framework given the framework argument
+            if framework_l == "pythonconstraint":
+                searchspace_builder = self.__build_searchspace
+            elif framework_l == "pysmt":
+                searchspace_builder = self.__build_searchspace_pysmt
+            elif framework_l == "pyatf":
+                searchspace_builder = self.__build_searchspace_pyATF
+            elif framework_l == "atf_cache":
+                searchspace_builder = self.__build_searchspace_ATF_cache
+                self.path_to_ATF_cache = path_to_ATF_cache
+            elif framework_l == "bruteforce":
+                searchspace_builder = self.__build_searchspace_bruteforce
+            else:
+                raise ValueError(f"Invalid framework parameter {framework}")
+
+            # get the solver given the solver method argument
+            solver = ""
+            if solver_method.lower() == "pc_backtrackingsolver":
+                solver = BacktrackingSolver()
+            elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
+                solver = OptimizedBacktrackingSolver(forwardcheck=False)
+            elif solver_method.lower() == "pc_recursivebacktrackingsolver":
+                solver = RecursiveBacktrackingSolver()
+            elif solver_method.lower() == "pc_minconflictssolver":
+                solver = MinConflictsSolver()
+            else:
+                raise ValueError(f"Solver method {solver_method} not recognized.")
+
+            # build the search space
+            self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)
 
-        # build the search space
-        self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)
+        # finalize construction
         self.__numpy = None
         self.num_params = len(self.tune_params)
         self.indices = np.arange(self.size)

From 57a262f6cafdb62ca7239bc619fcad0b896244fc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 21:25:58 -0800
Subject: [PATCH 083/253] Implemented construction of Searchspaces from caches

---
 kernel_tuner/searchspace.py                           | 7 ++++---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py | 4 +---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 281bd2009..0cc444717 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -107,9 +107,10 @@ def __init__(
 
         # if an imported cache, skip building and set the values directly
         if from_cache is not None:
-            configs = list(dict(from_cache["cache"]).keys())
-            self.list, self.__dict, self.size = None, None, len(configs)    # TODO
-            raise ValueError(configs)
+            configs = dict(from_cache["cache"]).values()
+            self.list = list(tuple([v for p, v in c.items() if p in self.tune_params]) for c in configs)
+            self.size = len(self.list)
+            self.__dict = dict(zip(self.list, range(self.size)))
         else:
             # get the framework given the framework argument
             if framework_l == "pythonconstraint":
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 3457a3a4e..95ffc9670 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -49,12 +49,10 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
-        self.searchspaces_transfer_learning = []
+        self.searchspaces_transfer_learning: list[Searchspace] = []
         for tl_cache in tuning_options.transfer_learning_caches:
             self.searchspaces_transfer_learning.append(Searchspace(None, None, None, from_cache=tl_cache))
 
-        raise ValueError(self.searchspaces_transfer_learning)
-
         self.best_rgpe_all = []
         self.best_random_all = []
         self.best_vanilla_nei_all = []

From 964a6ee80bb2890e0887c8aa6e14642aea0c764a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 22:07:56 -0800
Subject: [PATCH 084/253] Transfer learning inputs and outcomes are represented
 in Tensors

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py        |  2 +-
 .../strategies/bayes_opt_BOTorch_transfer.py        | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 45ac275a9..85e877a4a 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -66,7 +66,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
         self.searchspace_tensors = searchspace.get_tensorspace()
         self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
-        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs)
+        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs) # TODO implement continuing from cache
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 95ffc9670..34f1e0c82 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -21,6 +21,7 @@
 except ImportError:
     bayes_opt_present = False
 
+
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
 from kernel_tuner.util import StopCriterionReached
@@ -49,9 +50,19 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
+        # get input and outcome data for each task
         self.searchspaces_transfer_learning: list[Searchspace] = []
+        self.inputs_transfer_learning: list[Tensor] = []
+        self.outcomes_transfer_learning: list[Tensor] = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            self.searchspaces_transfer_learning.append(Searchspace(None, None, None, from_cache=tl_cache))
+            tensor_kwargs = searchspace.tensor_kwargs
+            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
+            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
+            self.searchspaces_transfer_learning.append(tl_searchspace)
+            self.inputs_transfer_learning.append(tl_searchspace.get_tensorspace())
+            tl_outcomes = [c[tuning_options.objective] for c in tl_cache["cache"].values()]
+            self.outcomes_transfer_learning.append(torch.tensor(tl_outcomes, **tensor_kwargs))
+            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
 
         self.best_rgpe_all = []
         self.best_random_all = []

From 24c67670ffd8bfd17fc6bc99b3aa7913e2a64a99 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 22:30:19 -0800
Subject: [PATCH 085/253] More general approach to model and likelihood
 initialization to make it suitable for transfer learning

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 85e877a4a..1c3fea975 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -65,7 +65,6 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.searchspace = searchspace
         self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
         self.searchspace_tensors = searchspace.get_tensorspace()
-        self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
         self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs) # TODO implement continuing from cache
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
@@ -122,19 +121,17 @@ def initial_sample(self):
         self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
 
-    def initialize_model(self, state_dict=None, exact=True):
-        """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
-        train_X = self.train_X
-        train_Y = self.train_Y
-        train_Yvar = self.train_Yvar
+    def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor=None, state_dict=None, exact=True):
+        """Initialize a model and likelihood, possibly with a state dict for faster fitting."""
+        bounds, bounds_indices = searchspace.get_tensorspace_bounds()
         transforms = dict(
-            input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
+            input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds),
             outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
         )
 
         # initialize the model
         if exact:
-            catdims = self.searchspace.get_tensorspace_categorical_dimensions()
+            catdims = searchspace.get_tensorspace_categorical_dimensions()
             if len(catdims) == 0:
                 model = SingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, **transforms)
             else:
@@ -151,14 +148,14 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
         else:
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
-        return mll, model
+        return model, mll
 
     def run(self, max_fevals: int, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
-            mll, model = self.initialize_model()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
             fevals_left = max_fevals - self.initial_sample_size
 
             # create array to gradually reduce number of optimization spaces as fewer fevals are left
@@ -228,7 +225,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 if loop_i < len(nums_optimization_spaces) - 1:
-                    mll, model = self.initialize_model(model.state_dict())
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
         except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From dc4b4c78723f41fe7ec6b13361f7bc71bd4f58b4 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 22:58:13 -0800
Subject: [PATCH 086/253] Fitting a model for each base transfer learning task

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |  8 ++-
 .../strategies/bayes_opt_BOTorch_transfer.py  | 68 ++++---------------
 2 files changed, 20 insertions(+), 56 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 1c3fea975..e250eafbb 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -149,6 +149,10 @@ def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, tr
         else:
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return model, mll
+    
+    def fit(self, mll):
+        """Fit a Marginal Log Likelihood."""
+        return fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
 
     def run(self, max_fevals: int, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
@@ -176,8 +180,8 @@ def run(self, max_fevals: int, max_batch_size=2048):
             for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
                 num_optimization_spaces = min(num_optimization_spaces, fevals_left)
 
-                # fit a Gaussian Process model
-                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
+                # fit on a Gaussian Process model
+                mll = self.fit(mll)
                 
                 # define the acquisition function
                 acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 34f1e0c82..527dcd409 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -50,70 +50,30 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
-        # get input and outcome data for each task
+        # set up the data and model for each transfer learning base task
         self.searchspaces_transfer_learning: list[Searchspace] = []
         self.inputs_transfer_learning: list[Tensor] = []
         self.outcomes_transfer_learning: list[Tensor] = []
+        self.models_mlls_transfer_learning: list[tuple] = []
         for tl_cache in tuning_options.transfer_learning_caches:
+            # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
             tl_searchspace.initialize_tensorspace(**tensor_kwargs)
             self.searchspaces_transfer_learning.append(tl_searchspace)
-            self.inputs_transfer_learning.append(tl_searchspace.get_tensorspace())
-            tl_outcomes = [c[tuning_options.objective] for c in tl_cache["cache"].values()]
-            self.outcomes_transfer_learning.append(torch.tensor(tl_outcomes, **tensor_kwargs))
-            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
-
-        self.best_rgpe_all = []
-        self.best_random_all = []
-        self.best_vanilla_nei_all = []
-        self.noise_std = 0.05
 
-        # Sample data for each base task
-        data_by_task = {}
-        for task in range(NUM_BASE_TASKS):
-            num_training_points = 20
-            # draw points from a sobol sequence
-            raw_x = draw_sobol_samples(
-                bounds=BOUNDS,
-                n=num_training_points,
-                q=1,
-                seed=task + 5397923,
-            ).squeeze(1)
-            # get observed values
-            f_x = f(raw_x, task_shift(task + 1))
-            train_y = f_x + noise_std * torch.randn_like(f_x)
-            train_yvar = torch.full_like(train_y, noise_std**2)
-            # store training data
-            data_by_task[task] = {
-                # scale x to [0, 1]
-                "train_x": normalize(raw_x, bounds=BOUNDS),
-                "train_y": train_y,
-                "train_yvar": train_yvar,
-            }
-
-        # Fit base model
-        base_model_list = []
-        for task in range(NUM_BASE_TASKS):
-            print(f"Fitting base model {task}")
-            model = self.get_fitted_model(
-                data_by_task[task]["train_x"],
-                data_by_task[task]["train_y"],
-                data_by_task[task]["train_yvar"],
-            )
-            base_model_list.append(model)
+            # get the inputs and outcomes for this task
+            tl_inputs = tl_searchspace.get_tensorspace()
+            self.inputs_transfer_learning.append(tl_inputs)
+            tl_outcomes = torch.tensor([c[tuning_options.objective] for c in tl_cache["cache"].values()], **tensor_kwargs).unsqueeze(-1)
+            self.outcomes_transfer_learning.append(tl_outcomes)
+            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
 
-    def run_config(self, config: tuple):
-        return super().run_config(config)
-    
-    def evaluate_configs(self, X: Tensor):
-        return super().evaluate_configs(X)
-    
-    def initial_sample(self):
-        return super().initial_sample()
-    
-    def initialize_model(self, state_dict=None, exact=True):
-        return super().initialize_model(state_dict, exact)
+            # fit a model and likelihood for this task
+            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
+            mll = self.fit(mll)
+            self.models_mlls_transfer_learning.append((model, mll))
+        raise ValueError(self.models_mlls_transfer_learning)
     
     def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
         """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""

From e21a605694fc4c896e31b59afa75c0f18d6ee09c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 23:48:43 -0800
Subject: [PATCH 087/253] Account for invalid configurations in base task
 caches

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py     |  8 +++++++-
 .../strategies/bayes_opt_BOTorch_transfer.py     | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index e250eafbb..fd558feea 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -69,12 +69,18 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
 
+    def is_valid_result(self, result, results=None):
+        """Returns whether the result is valid."""
+        if results is None:
+            results = []
+        return not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
+
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result, results = self.cost_func(config)
         results = np.array(results)
         var = np.nan
-        valid = not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
+        valid = self.is_valid_result(result, results)
         if not valid:
             result = np.nan
         elif not self.maximize:
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 527dcd409..e30a3a4db 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -56,6 +56,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.outcomes_transfer_learning: list[Tensor] = []
         self.models_mlls_transfer_learning: list[tuple] = []
         for tl_cache in tuning_options.transfer_learning_caches:
+            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
@@ -63,17 +64,24 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
             self.searchspaces_transfer_learning.append(tl_searchspace)
 
             # get the inputs and outcomes for this task
-            tl_inputs = tl_searchspace.get_tensorspace()
+            inputs = []
+            outcomes = []
+            for c in tl_cache["cache"].values():
+                result = c[tuning_options.objective]
+                if self.is_valid_result(result):
+                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
+                    inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    outcomes.append(result)
+            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
+            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
+            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
             self.inputs_transfer_learning.append(tl_inputs)
-            tl_outcomes = torch.tensor([c[tuning_options.objective] for c in tl_cache["cache"].values()], **tensor_kwargs).unsqueeze(-1)
             self.outcomes_transfer_learning.append(tl_outcomes)
-            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
 
             # fit a model and likelihood for this task
             model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
             mll = self.fit(mll)
             self.models_mlls_transfer_learning.append((model, mll))
-        raise ValueError(self.models_mlls_transfer_learning)
     
     def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
         """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""

From e3cfe912759536afb21b140c3acb91d0f4e74f89 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 00:44:53 -0800
Subject: [PATCH 088/253] Implement main RGPE BO loop

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 87 ++++++++-----------
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index e30a3a4db..4030dbad2 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -2,7 +2,7 @@
 
 try:
     import torch
-    from botorch.acquisition import LogExpectedImprovement
+    from botorch.acquisition import LogExpectedImprovement, qLogNoisyExpectedImprovement
     from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
     from botorch.models import SingleTaskGP
     from botorch.models.gpytorch import GPyTorchModel
@@ -54,7 +54,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.searchspaces_transfer_learning: list[Searchspace] = []
         self.inputs_transfer_learning: list[Tensor] = []
         self.outcomes_transfer_learning: list[Tensor] = []
-        self.models_mlls_transfer_learning: list[tuple] = []
+        self.models_transfer_learning: list = []
         for tl_cache in tuning_options.transfer_learning_caches:
             print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
             # construct the searchspace for this task
@@ -81,17 +81,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
             # fit a model and likelihood for this task
             model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
             mll = self.fit(mll)
-            self.models_mlls_transfer_learning.append((model, mll))
-    
-    def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
-        """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""
-        model = SingleTaskGP(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
-        if state_dict is None:
-            mll = ExactMarginalLogLikelihood(model.likelihood, model).to(train_X)
-            fit_gpytorch_mll(mll)
-        else:
-            model.load_state_dict(state_dict)
-        return model
+            self.models_transfer_learning.append(model)
     
     def roll_col(self, X, shift):
         """Rotate columns to right by shift."""
@@ -221,54 +211,53 @@ def run(self, max_fevals: int, max_batch_size=2048):
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
-            mll, model = self.initialize_model()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
             fevals_left = max_fevals - self.initial_sample_size
 
             # Bayesian optimization loop
             for _ in range(fevals_left):
 
-                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
-                model_list = base_model_list + [target_model]
-                rank_weights = compute_rank_weights(
-                    train_x,
-                    train_y,
-                    base_model_list,
-                    target_model,
+                # fit a Gaussian Process model
+                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
+
+                # calculate the rank weights
+                model_list = self.models_transfer_learning + [model]
+                rank_weights = self.compute_rank_weights(
+                    self.train_X,
+                    self.train_Y,
+                    self.models_transfer_learning,
+                    model,
                     NUM_POSTERIOR_SAMPLES,
                 )
 
-                # fit a Gaussian Process model
-                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
-                
-                # define the acquisition function
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                
-                # optimize acquisition function to find the next evaluation point
-                if max_batch_size < self.searchspace_tensors.size(0):
-                    # optimize over a lattice if the space is too large
-                    candidate, _ = optimize_acqf_discrete_local_search(
-                        acqf,
-                        q=1,
-                        discrete_choices=self.searchspace_tensors,
-                        max_batch_size=max_batch_size,
-                        num_restarts=5,
-                        raw_samples=1024
-                    )
-                else:
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=self.searchspace_tensors,
-                        max_batch_size=max_batch_size
-                    )
+                # create rank model and acquisition function
+                rgpe_model = RGPE(model_list, rank_weights)
+                # acqf = LogExpectedImprovement(model=rgpe_model, best_f=self.train_Y.max(), maximize=True)
+                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
+                qNEI = qLogNoisyExpectedImprovement(
+                    model=rgpe_model,
+                    X_baseline=self.train_X,
+                    sampler=sampler_qnei,
+                    prune_baseline=False,
+                )
+
+                # optimize
+                candidate, _ = optimize_acqf_discrete_local_search(
+                    acq_function=qNEI,
+                    discrete_choices=self.searchspace_tensors,
+                    q=Q_BATCH_SIZE,
+                    num_restarts=N_RESTARTS,
+                    raw_samples=N_RESTART_CANDIDATES,
+                    max_batch_size=max_batch_size
+                )
                     
-                    # evaluate the new candidate
-                    self.evaluate_configs(candidate)
-                    fevals_left -= 1
+                # evaluate the new candidate
+                self.evaluate_configs(candidate)
+                fevals_left -= 1
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 if fevals_left > 0:
-                    mll, model = self.initialize_model(model.state_dict())
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
         except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From 2334214e22a764c6a467653d2846301a1aea6a6d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 01:24:56 -0800
Subject: [PATCH 089/253] Improved the efficiency of taking initial sample

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index fd558feea..cd496120e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -122,10 +122,11 @@ def evaluate_configs(self, X: Tensor):
         
     def initial_sample(self):
         """Take an initial sample."""
-        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
-        sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
-        self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
+        if self.initial_sample_size > 0:
+            sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
+            sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
+            self.evaluate_configs(sample_configs)
 
     def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor=None, state_dict=None, exact=True):
         """Initialize a model and likelihood, possibly with a state dict for faster fitting."""

From c78a18c29c726e4f4022135603fd32bf1cdc2eb9 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 01:27:48 -0800
Subject: [PATCH 090/253] Use of state dictionary is made optional

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 39 ++++++++++++-------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 4030dbad2..790f0f0c9 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -125,7 +125,7 @@ def compute_ranking_loss(self, f_samps, target_y):
                 ).sum(dim=-1)
         return rank_loss
     
-    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples):
+    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples, no_state=False):
         """Create a batch-mode LOOCV GP and draw a joint sample across all points from the target task.
 
         Args:
@@ -141,15 +141,22 @@ def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, targ
         masks = torch.eye(len(train_x), dtype=torch.uint8, device=self.tensor_device).bool()
         train_x_cv = torch.stack([train_x[~m] for m in masks])
         train_y_cv = torch.stack([train_y[~m] for m in masks])
-        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks])
-        state_dict = target_model.state_dict()
-        # expand to batch size of batch_mode LOOCV model
-        state_dict_expanded = {
-            name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
-            for name, t in state_dict.items()
-        }
-        model = self.get_fitted_model(
-            train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
+        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks]) if train_yvar is not None else None
+
+        # use a state dictionary for fast updates
+        if no_state:
+            state_dict_expanded = None
+        else:
+            state_dict = target_model.state_dict()
+
+            # expand to batch size of batch_mode LOOCV model
+            state_dict_expanded = {
+                name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
+                for name, t in state_dict.items()
+            }
+        
+        model, _ = self.get_model_and_likelihood(
+            self.searchspace, train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
         )
         with torch.no_grad():
             posterior = model.posterior(train_x)
@@ -159,7 +166,7 @@ def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, targ
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
             return sampler(posterior).squeeze(-1)
     
-    def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_samples):
+    def compute_rank_weights(self, train_x, train_y, train_yvar, base_models, target_model, num_samples, no_state=False):
         """Compute ranking weights for each base model and the target model (using LOOCV for the target model).
         
         Note: This implementation does not currently address weight dilution, since we only have a small number of base models.
@@ -177,8 +184,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
         ranking_losses = []
 
         # compute ranking loss for each base model
-        for task in range(len(base_models)):
-            model = base_models[task]
+        for model in base_models:
             # compute posterior over training points for target task
             posterior = model.posterior(train_x)
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
@@ -194,6 +200,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             train_yvar,
             target_model,
             num_samples,
+            no_state=no_state,
         )
         ranking_losses.append(self.compute_ranking_loss(target_f_samps, train_y))
         ranking_loss_tensor = torch.stack(ranking_losses)
@@ -213,6 +220,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 self.initial_sample()
             model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
             fevals_left = max_fevals - self.initial_sample_size
+            first_loop = self.initial_sample_size > 0
 
             # Bayesian optimization loop
             for _ in range(fevals_left):
@@ -225,9 +233,11 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 rank_weights = self.compute_rank_weights(
                     self.train_X,
                     self.train_Y,
+                    self.train_Yvar,
                     self.models_transfer_learning,
                     model,
                     NUM_POSTERIOR_SAMPLES,
+                    no_state=first_loop,
                 )
 
                 # create rank model and acquisition function
@@ -258,6 +268,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 # reinitialize the models so they are ready for fitting on next iteration
                 if fevals_left > 0:
                     model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
+                    first_loop = False
         except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)
@@ -310,4 +321,4 @@ def forward(self, x):
         # base models and target model
         mean_x = torch.stack(weighted_means).sum(dim=0)
         covar_x = PsdSumLazyTensor(*weighted_covars)
-        return MultivariateNormal(mean_x, covar_x)
\ No newline at end of file
+        return MultivariateNormal(mean_x, covar_x)

From 8416098b111de19c954a7f8079512419492ef380 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 02:44:59 -0800
Subject: [PATCH 091/253] Renamed RGPE strategy

---
 kernel_tuner/interface.py                              |  6 ++++--
 ..._transfer.py => bayes_opt_BOTorch_transfer_RGPE.py} | 10 +++-------
 2 files changed, 7 insertions(+), 9 deletions(-)
 rename kernel_tuner/strategies/{bayes_opt_BOTorch_transfer.py => bayes_opt_BOTorch_transfer_RGPE.py} (96%)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 54aa737f6..225439d30 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,7 +52,8 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
-    bayes_opt_BOTorch_transfer,
+    bayes_opt_BOTorch_transfer_RGPE,
+    bayes_opt_BOTorch_transfer_weighted,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -92,7 +93,8 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
-    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer,
+    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer_weighted,
+    "bayes_opt_BOTorch_transfer_RGPE": bayes_opt_BOTorch_transfer_RGPE,
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
similarity index 96%
rename from kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
rename to kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
index 790f0f0c9..9b42fc77e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
@@ -1,19 +1,15 @@
-"""Bayesian Optimization implementation using BO Torch."""
+"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
 
 try:
     import torch
-    from botorch.acquisition import LogExpectedImprovement, qLogNoisyExpectedImprovement
+    from botorch.acquisition import qLogNoisyExpectedImprovement
     from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
-    from botorch.models import SingleTaskGP
     from botorch.models.gpytorch import GPyTorchModel
-    from botorch.optim.optimize import optimize_acqf_discrete, optimize_acqf_discrete_local_search
+    from botorch.optim.optimize import optimize_acqf_discrete_local_search
     from botorch.sampling.normal import SobolQMCNormalSampler
-    from botorch.utils.sampling import draw_sobol_samples
-    from botorch.utils.transforms import normalize, unnormalize
     from gpytorch.distributions import MultivariateNormal
     from gpytorch.lazy import PsdSumLazyTensor
     from gpytorch.likelihoods import LikelihoodList
-    from gpytorch.mlls import ExactMarginalLogLikelihood
     from gpytorch.models import GP
     from torch import Tensor
     from torch.nn import ModuleList

From dc000b70abeb35de3bbaaf1656a1b2de7350590f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 03:43:45 -0800
Subject: [PATCH 092/253] Implemented new transfer learning strategy with
 multiple independent GPs

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |   1 +
 .../bayes_opt_BOTorch_transfer_weighted.py    | 156 ++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index cd496120e..5ee2854dc 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -117,6 +117,7 @@ def evaluate_configs(self, X: Tensor):
                 self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
                 self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
                 self.train_Yvar = torch.cat([self.train_Yvar, torch.tensor(valid_vars, **self.searchspace.tensor_kwargs)])
+            return valid_results
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
new file mode 100644
index 000000000..e76c0793c
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -0,0 +1,156 @@
+"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
+
+try:
+    import torch
+    from botorch.acquisition import LogExpectedImprovement
+    from botorch.optim.optimize import optimize_acqf_discrete
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from math import ceil, sqrt
+
+import numpy as np
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+from kernel_tuner.util import StopCriterionReached
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimizationTransfer(BayesianOptimization):
+    """Bayesian Optimization class with transfer learning."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        super().__init__(searchspace, runner, tuning_options)
+
+        # set up the data and model for each transfer learning base task
+        self.searchspaces_transfer_learning: list[Searchspace] = []
+        self.inputs_transfer_learning: list[Tensor] = []
+        self.outcomes_transfer_learning: list[Tensor] = []
+        self.models_transfer_learning: list = []
+        for tl_cache in tuning_options.transfer_learning_caches:
+            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            # construct the searchspace for this task
+            tensor_kwargs = searchspace.tensor_kwargs
+            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
+            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
+            self.searchspaces_transfer_learning.append(tl_searchspace)
+
+            # get the inputs and outcomes for this task
+            inputs = []
+            outcomes = []
+            for c in tl_cache["cache"].values():
+                result = c[tuning_options.objective]
+                if self.is_valid_result(result):
+                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
+                    inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    outcomes.append(result)
+            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
+            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
+            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
+            self.inputs_transfer_learning.append(tl_inputs)
+            self.outcomes_transfer_learning.append(tl_outcomes)
+
+            # fit a model and likelihood for this task
+            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
+            mll = self.fit(mll)
+            self.models_transfer_learning.append(model)
+    
+    def run(self, max_fevals: int, max_batch_size=2048):
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # create array to gradually reduce number of optimization spaces as fewer fevals are left
+            tensorspace_size = self.searchspace_tensors.size(0)
+            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
+            fevals_left -= reserve_final_loops
+            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
+            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
+            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
+            # if there's a discrepency, add or subtract the difference from the first number
+            if np.sum(nums_optimization_spaces) != fevals_left:
+                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
+            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
+            fevals_left += reserve_final_loops
+
+            # create the acquisition functions for the transferred GPs
+            acqfs = [LogExpectedImprovement(model=m, best_f=self.outcomes_transfer_learning[i].max(), maximize=True) for i, m in enumerate(self.models_transfer_learning)]
+            acqfs_results = [list() for _ in acqfs]
+
+            # Bayesian optimization loop
+            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
+                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
+
+                # fit on a Gaussian Process model
+                mll = self.fit(mll)
+
+                # divide the optimization space into random chuncks
+                tensorspace_size = self.searchspace_tensors.size(0)
+                if num_optimization_spaces <= 1:
+                    optimization_spaces = [self.searchspace_tensors]
+                else:
+                    # shuffle the searchspace
+                    shuffled_indices = torch.randperm(tensorspace_size)
+                    tensorspace = self.searchspace_tensors[shuffled_indices]
+                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
+
+                # set which acqfuisition function is used at each point of the optimization space loop
+                if num_optimization_spaces > len(self.models_transfer_learning):
+                    # all models get a proportional turn
+                    selected_acqfs = np.linspace(start=0, stop=len(acqfs), num=num_optimization_spaces)
+                    selected_acqfs = selected_acqfs.round(0).astype(int)
+                    selected_acqfs = selected_acqfs.clip(0, len(acqfs)-1)
+                elif num_optimization_spaces == len(self.models_transfer_learning):
+                    # all models get one turn
+                    selected_acqfs = list(range(num_optimization_spaces))
+                elif num_optimization_spaces == 1:
+                    # only the target model is used
+                    selected_acqfs = [0]
+                else:
+                    # only select the target + best performing models (can include target as well)
+                    acqfs_means = np.array([np.mean(r) for r in acqfs_results])
+                    if not self.tuning_options["objective_higher_is_better"]:
+                        acqfs_means = -acqfs_means
+                    selected_acqfs = [0] + np.argpartition(acqfs_means, -num_optimization_spaces-1)[-num_optimization_spaces-1:]
+                    selected_acqfs = selected_acqfs.round(0).astype(int).clip(0, num_optimization_spaces-1)
+
+                # define the acquisition functions
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                current_acqfs = [acqf] + acqfs
+                
+                # optimize acquisition function to find the next evaluation point
+                for i, optimization_space in enumerate(optimization_spaces):
+                    acqfs_index = selected_acqfs[i]
+                    candidate, _ = optimize_acqf_discrete(
+                        current_acqfs[acqfs_index], 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    result = self.evaluate_configs(candidate)
+                    if len(result) == 1:
+                        acqfs_results[acqfs_index].append(result[0])
+                    fevals_left -= 1
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                if loop_i < len(nums_optimization_spaces) - 1:
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
+        except StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
+
+        return self.cost_func.results
\ No newline at end of file

From aa30ec2eceb7fde4e2b45d7967d6a6fd9a1ab299 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 16:26:30 -0800
Subject: [PATCH 093/253] Removed redundant min/max results adjustment

---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
index e76c0793c..f424f41b6 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -121,8 +121,6 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 else:
                     # only select the target + best performing models (can include target as well)
                     acqfs_means = np.array([np.mean(r) for r in acqfs_results])
-                    if not self.tuning_options["objective_higher_is_better"]:
-                        acqfs_means = -acqfs_means
                     selected_acqfs = [0] + np.argpartition(acqfs_means, -num_optimization_spaces-1)[-num_optimization_spaces-1:]
                     selected_acqfs = selected_acqfs.round(0).astype(int).clip(0, num_optimization_spaces-1)
 

From fd6f95ec51707a5b165b5e8fce4dd75a0189e592 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 26 Nov 2024 13:16:56 -0800
Subject: [PATCH 094/253] Result registration must be optimization direction
 dependent

---
 .../strategies/bayes_opt_BOTorch_transfer_weighted.py         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
index f424f41b6..1778d3ac2 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -51,6 +51,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
                 if self.is_valid_result(result):
                     config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
                     inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    if not self.maximize:
+                        result = -result
                     outcomes.append(result)
             tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
             tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
@@ -151,4 +153,4 @@ def run(self, max_fevals: int, max_batch_size=2048):
             if self.tuning_options.verbose:
                 print(e)
 
-        return self.cost_func.results
\ No newline at end of file
+        return self.cost_func.results

From 6963febd552d6d0438cfc7cd79fb52ce6eb39265 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 26 Nov 2024 13:17:27 -0800
Subject: [PATCH 095/253] Transfer learning by direct transfer of best
 configurations

---
 .../bayes_opt_BOTorch_transfer_direct.py      | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
new file mode 100644
index 000000000..ee552dcc7
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
@@ -0,0 +1,141 @@
+"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
+
+try:
+    import torch
+    from botorch.acquisition import LogExpectedImprovement
+    from botorch.optim.optimize import optimize_acqf_discrete
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from math import ceil, sqrt
+
+import numpy as np
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+from kernel_tuner.util import StopCriterionReached
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimizationTransfer(BayesianOptimization):
+    """Bayesian Optimization class with transfer learning."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        super().__init__(searchspace, runner, tuning_options)
+
+        # set up the data and model for each transfer learning base task
+        self.searchspaces_transfer_learning: list[Searchspace] = []
+        self.inputs_transfer_learning: list[Tensor] = []
+        self.outcomes_transfer_learning: list[Tensor] = []
+        for tl_cache in tuning_options.transfer_learning_caches:
+            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            # construct the searchspace for this task
+            tensor_kwargs = searchspace.tensor_kwargs
+            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
+            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
+            self.searchspaces_transfer_learning.append(tl_searchspace)
+
+            # get the inputs and outcomes for this task
+            inputs = []
+            outcomes = []
+            for c in tl_cache["cache"].values():
+                result = c[tuning_options.objective]
+                if self.is_valid_result(result):
+                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
+                    inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    if not self.maximize:
+                        result = -result
+                    outcomes.append(result)
+            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
+            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
+            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
+            self.inputs_transfer_learning.append(tl_inputs)
+            self.outcomes_transfer_learning.append(tl_outcomes)
+    
+    def run(self, max_fevals: int, max_batch_size=2048):
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # create array to gradually reduce number of optimization spaces as fewer fevals are left
+            tensorspace_size = self.searchspace_tensors.size(0)
+            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
+            fevals_left -= reserve_final_loops
+            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
+            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
+            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
+            # if there's a discrepency, add or subtract the difference from the first number
+            if np.sum(nums_optimization_spaces) != fevals_left:
+                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
+            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
+            fevals_left += reserve_final_loops
+
+            # get the number of top configurations to select from transfer learning caches
+            num_tl_caches = len(self.outcomes_transfer_learning)
+            use_top_configs_until_loop = np.argmax(nums_optimization_spaces < num_tl_caches+1)  # stop after we have fewer num_optimization_spaces than caches - because with more caches the ACQF will not be used as much
+            num_top_configs = sum([ceil(n/(num_tl_caches+1)) for n in nums_optimization_spaces[:use_top_configs_until_loop]])
+
+            # select the top configurations for each of the transfer learning caches
+            top_configs = [list() for _ in self.outcomes_transfer_learning]
+            for tl_index, tl_outcomes in enumerate(self.outcomes_transfer_learning):
+                top_configs[tl_index] = self.inputs_transfer_learning[tl_outcomes.argmax()[:num_top_configs]] # TODO check if correct
+
+            # # if there are duplicate configurations, move them up and make sure there are only unique configs
+            # duplicate configurations are inserted at the num_configs-(sum of indices)th index
+            # # TODO
+
+            # Bayesian optimization loop
+            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
+                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
+
+                # calculate how many of the optimization spaces to optimize using GP
+                optimize_with_GP = max(round(num_optimization_spaces/(num_tl_caches+1)), 1)
+
+                # divide the optimization space into random chuncks
+                tensorspace_size = self.searchspace_tensors.size(0)
+                if num_optimization_spaces <= 1:
+                    optimization_spaces = [self.searchspace_tensors]
+                else:
+                    # shuffle the searchspace
+                    shuffled_indices = torch.randperm(tensorspace_size)
+                    tensorspace = self.searchspace_tensors[shuffled_indices]
+                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
+
+                # fit on a Gaussian Process model
+                mll = self.fit(mll)
+
+                # define the acquisition functions
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                
+                # optimize acquisition function to find the next evaluation point
+                for i, optimization_space in enumerate(optimization_spaces):
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    self.evaluate_configs(candidate)
+                    fevals_left -= 1
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                if loop_i < len(nums_optimization_spaces) - 1:
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
+        except StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
+
+        return self.cost_func.results

From a08953e6b9854f200dd84c0b0234fc98f190eddc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 5 Mar 2025 22:11:50 +0100
Subject: [PATCH 096/253] BO update

---
 kernel_tuner/interface.py                             | 11 ++++++++++-
 .../strategies/bayes_opt_BOTorch_transfer_RGPE.py     |  2 ++
 tune_bo_conv.py                                       |  6 +++---
 tune_bo_dedisp.py                                     |  2 +-
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 225439d30..16af99250 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,6 +52,7 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
+    bayes_opt_BOTorch_transfer_direct,
     bayes_opt_BOTorch_transfer_RGPE,
     bayes_opt_BOTorch_transfer_weighted,
     bayes_opt_GPyTorch,
@@ -93,7 +94,8 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
-    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer_weighted,
+    "bayes_opt_BOTorch_transfer_direct": bayes_opt_BOTorch_transfer_direct,
+    "bayes_opt_BOTorch_transfer_weighted": bayes_opt_BOTorch_transfer_weighted,
     "bayes_opt_BOTorch_transfer_RGPE": bayes_opt_BOTorch_transfer_RGPE,
 }
 
@@ -910,8 +912,14 @@ def tune_kernel_T1(
     device = kernelspec["Device"]["Name"]
     strategy = inputs["Search"]["Name"]
 
+    # set the cache and transfer learning cache paths
     if cache_filepath is None and "SimulationInput" in kernelspec:
         cache_filepath = Path(kernelspec["SimulationInput"])
+    cache_dir = Path(cache_filepath).parent
+    # TODO remove in production!
+    transfer_learning_caches = [
+        p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filepath.name
+    ]
 
     # get the grid divisions
     grid_divs = {}
@@ -996,6 +1004,7 @@ def tune_kernel_T1(
         strategy_options=strategy_options,
         objective=objective,
         objective_higher_is_better=objective_higher_is_better,
+        transfer_learning_caches=transfer_learning_caches,
     )
     if output_T4:
         return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
index 9b42fc77e..c371eb889 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
@@ -67,6 +67,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
                 if self.is_valid_result(result):
                     config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
                     inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    if not self.maximize:
+                        result = -result
                     outcomes.append(result)
             tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
             tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
diff --git a/tune_bo_conv.py b/tune_bo_conv.py
index 61635c51f..ec37fbf67 100644
--- a/tune_bo_conv.py
+++ b/tune_bo_conv.py
@@ -23,13 +23,13 @@ def ops(w, h, fw, fh):
 
 def tune(
     device_name: str,
-    strategy="bayes_opt_BOTorch_transfer",
+    strategy="bayes_opt_BOTorch_transfer_weighted",
     strategy_options={ 'max_fevals': 150 },
     verbose=True,
     quiet=False,
     simulation_mode=True,
     lang="CUDA",
-    profiling=False,
+    profiling=True,
 ):  
     directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
     assert directory.exists()
@@ -135,7 +135,7 @@ def run():
         with cProfile.Profile() as pr:
             results, env = run()
             if profiling:
-                pr.dump_stats('bo_prof.prof')
+                pr.dump_stats('bo_prof_tl2.prof')
     else:
         results, env = run()
 
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
index 67a56c17e..78b4b0474 100644
--- a/tune_bo_dedisp.py
+++ b/tune_bo_dedisp.py
@@ -20,7 +20,7 @@
 max_freq = min_freq + (nr_channels-1) * channel_bandwidth
 
 
-def tune(device_name, strategy="bayes_opt_BOTorch_transfer", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+def tune(device_name, strategy="bayes_opt_BOTorch_transfer_weighted", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
 
     args = []
 

From ecd78021afb3596730d56d0adb7dc1478aa3171f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 5 Mar 2025 22:27:36 +0100
Subject: [PATCH 097/253] Improved conversion of tunable parameter

---
 kernel_tuner/interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 16af99250..8d83377f6 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -934,7 +934,7 @@ def tune_kernel_T1(
         tune_param = None
         if param["Type"] in ["int", "float"]:
             vals = param["Values"]
-            if vals[:5] == "list(" or (vals[0] == "[" and vals[-1] == "]"):
+            if "list(" in vals or "range(" in vals or (vals[0] == "[" and vals[-1] == "]"):
                 tune_param = eval(vals)
             else:
                 tune_param = literal_eval(vals)

From b7cda3619afb98706f8b6ea749662221cf5abb80 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 6 Mar 2025 08:19:53 +0100
Subject: [PATCH 098/253] Extended and improved conversion of T1 arguments,
 improved error reporting on problem size differences

---
 kernel_tuner/interface.py | 11 ++++++++++-
 kernel_tuner/util.py      | 21 +++++++++++++++------
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 8d83377f6..9dd964859 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -959,7 +959,11 @@ def tune_kernel_T1(
     cmem_arguments = {}
     for arg in kernelspec["Arguments"]:
         argument = None
-        if arg["Type"] == "float" and arg["MemoryType"] == "Vector":
+        if arg["MemoryType"] == "Vector":
+            if arg["Type"] != "float":
+                raise NotImplementedError(
+                    f"Conversion for vector type '{arg['Type']}' has not yet been implemented: {arg}"
+                )
             size = arg["Size"]
             if isinstance(size, str):
                 args = tune_params.copy()
@@ -973,6 +977,11 @@ def tune_kernel_T1(
                 argument = numpy.random.randn(size).astype(numpy.float32)
             else:
                 raise NotImplementedError(f"Conversion for fill type '{arg['FillType']}' has not yet been implemented")
+        elif arg["MemoryType"] == "Scalar":
+            if arg["Type"] == "float":
+                argument = numpy.float32(arg["FillValue"])
+            else:
+                raise NotImplementedError()
         if argument is not None:
             arguments.append(argument)
             if "MemType" in arg and arg["MemType"] == "Constant":
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index dac5d6de4..c8acfffde 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -85,7 +85,6 @@ def __init__(self):
         self.Tensor = Exception  # using Exception here as a type that will never be among kernel arguments
 
 
-
 class SkippableFailure(Exception):
     """Exception used to raise when compiling or launching a kernel fails for a reason that can be expected."""
 
@@ -889,7 +888,7 @@ def to_numeric_constraint(
         if len(comparators_found) != 1:
             return None
         comparator = comparators_found[0]
-    
+
         # split the string on the comparison and remove leading and trailing whitespace
         left, right = tuple(s.strip() for s in restriction.split(comparator))
 
@@ -1032,7 +1031,10 @@ def to_equality_constraint(
                 ):
                     parsed_restriction = parsed_restriction[1:-1]
                 # check if we can turn this into the built-in numeric comparison constraint
-                if all(all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param]) for param in params_used):
+                if all(
+                    all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param])
+                    for param in params_used
+                ):
                     finalized_constraint = to_numeric_constraint(parsed_restriction, params_used)
                 if finalized_constraint is None:
                     # check if we can turn this into the built-in equality comparison constraint
@@ -1080,7 +1082,10 @@ def compile_restrictions(
 ) -> list[tuple[Union[str, Constraint, FunctionType], list[str]]]:
     """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used, or a single Function if monolithic is true."""
     # change tuples consisting of strings and tunable parameters to only strings to compile
-    restrictions = [r[0] if isinstance(r, tuple) and len(r) == 2 and isinstance(r[0], str) and isinstance(r[1], list) else r for r in restrictions]
+    restrictions = [
+        r[0] if isinstance(r, tuple) and len(r) == 2 and isinstance(r[0], str) and isinstance(r[1], list) else r
+        for r in restrictions
+    ]
     # filter the restrictions to get only the strings
     restrictions_str, restrictions_ignore = [], []
     for r in restrictions:
@@ -1176,7 +1181,9 @@ def process_cache(cache, kernel_options, tuning_options, runner):
 
     # if file exists
     else:
-        cached_data = read_cache(cache, not tuning_options.simulation_mode)    # don't open the cache in (parallel) simulation mode to avoid race conditions
+        cached_data = read_cache(
+            cache, not tuning_options.simulation_mode
+        )  # don't open the cache in (parallel) simulation mode to avoid race conditions
 
         # if in simulation mode, use the device name from the cache file as the runner device name
         if runner.simulation_mode:
@@ -1203,7 +1210,9 @@ def process_cache(cache, kernel_options, tuning_options, runner):
             # cache returns list, problem_size is likely a tuple. Therefore, the next check
             # checks the equality of all items in the list/tuples individually
             elif not all([i == j for i, j in zip(cached_data["problem_size"], kernel_options.problem_size)]):
-                raise ValueError("Cannot load cache which contains results for different problem_size")
+                raise ValueError(
+                    f"Cannot load cache which contains results for different problem_size ({cached_data["problem_size"]=} != {kernel_options.problem_size=})"
+                )
         if cached_data["tune_params_keys"] != list(tuning_options.tune_params.keys()):
             if all(key in tuning_options.tune_params for key in cached_data["tune_params_keys"]):
                 raise ValueError(

From 8836ce20ebd5a941495a8e5b01df38bd7a340d0a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:20:04 +0100
Subject: [PATCH 099/253] Improved selection of transfer learning caches

---
 kernel_tuner/interface.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 9dd964859..38855dd90 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -918,7 +918,12 @@ def tune_kernel_T1(
     cache_dir = Path(cache_filepath).parent
     # TODO remove in production!
     transfer_learning_caches = [
-        p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filepath.name
+        p
+        for p in cache_dir.iterdir()
+        if len(p.suffixes) > 0
+        and p.suffixes[-1].endswith(".json")
+        and not p.stem.endswith("_T4")
+        and p.name != cache_filepath.name
     ]
 
     # get the grid divisions

From 539aed3e6f54d042c84339fd44fe4ae5110bedc2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:25:46 +0100
Subject: [PATCH 100/253] Fixed an error with quotes in an f-string

---
 kernel_tuner/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index c8acfffde..9f7916dd7 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1211,7 +1211,7 @@ def process_cache(cache, kernel_options, tuning_options, runner):
             # checks the equality of all items in the list/tuples individually
             elif not all([i == j for i, j in zip(cached_data["problem_size"], kernel_options.problem_size)]):
                 raise ValueError(
-                    f"Cannot load cache which contains results for different problem_size ({cached_data["problem_size"]=} != {kernel_options.problem_size=})"
+                    f"Cannot load cache which contains results for different problem_size ({cached_data['problem_size']=} != {kernel_options.problem_size=})"
                 )
         if cached_data["tune_params_keys"] != list(tuning_options.tune_params.keys()):
             if all(key in tuning_options.tune_params for key in cached_data["tune_params_keys"]):

From 435b56bbd9ed45a9303668f0f3340d058edc2f87 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:28:34 +0100
Subject: [PATCH 101/253] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/searchspace.py | 65 ++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 0cc444717..2c9f87633 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -21,6 +21,7 @@
 try:
     import torch
     from torch import Tensor
+
     torch_available = True
 except ImportError:
     torch_available = False
@@ -42,7 +43,7 @@ def __init__(
         block_size_names=default_block_size_names,
         build_neighbors_index=False,
         neighbor_method=None,
-        from_cache: dict=None,
+        from_cache: dict = None,
         framework="PythonConstraint",
         solver_method="PC_OptimizedBacktrackingSolver",
         path_to_ATF_cache: Path = None,
@@ -58,10 +59,14 @@ def __init__(
         """
         # check the arguments
         if from_cache is not None:
-            assert tune_params is None and restrictions is None and max_threads is None, "When `from_cache` is used, the positional arguments must be set to None."
+            assert (
+                tune_params is None and restrictions is None and max_threads is None
+            ), "When `from_cache` is used, the positional arguments must be set to None."
             tune_params = from_cache["tune_params"]
         if from_cache is None:
-            assert tune_params is not None and restrictions is not None and max_threads is not None, "Must specify positional arugments ."
+            assert (
+                tune_params is not None and restrictions is not None and max_threads is not None
+            ), "Must specify positional arugments ."
 
         # set the object attributes using the arguments
         framework_l = framework.lower()
@@ -77,9 +82,9 @@ def __init__(
         self._tensorspace_param_config_structure = []
         self._map_tensor_to_param = {}
         self._map_param_to_tensor = {}
-        self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
+        self.restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
+        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None
@@ -93,8 +98,12 @@ def __init__(
         restrictions = [restrictions] if not isinstance(restrictions, list) else restrictions
         if (
             len(restrictions) > 0
-            and (any(isinstance(restriction, str) for restriction in restrictions)
-            or any(isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple)))
+            and (
+                any(isinstance(restriction, str) for restriction in restrictions)
+                or any(
+                    isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple)
+                )
+            )
             and not (framework_l == "pysmt" or framework_l == "bruteforce")
         ):
             self.restrictions = compile_restrictions(
@@ -609,14 +618,14 @@ def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices)
         return list(map(self.list.__getitem__, indices))
 
-    def get_param_config_index(self, param_config: Union[tuple, Tensor]):
+    def get_param_config_index(self, param_config: Union[tuple, any]):
         """Lookup the index for a parameter configuration, returns None if not found."""
         if torch_available and isinstance(param_config, Tensor):
             param_config = self.tensor_to_param_config(param_config)
         # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space
         return self.__dict.get(param_config, None)
-    
-    def initialize_tensorspace(self, dtype = None, device = None):
+
+    def initialize_tensorspace(self, dtype=None, device=None):
         """Encode the searchspace in a Tensor. Save the mapping. Call this function directly to control the precision or device used."""
         assert self._tensorspace is None, "Tensorspace is already initialized"
         skipped_count = 0
@@ -642,16 +651,16 @@ def initialize_tensorspace(self, dtype = None, device = None):
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = torch.tensor(param_values, dtype=self.tensor_dtype)
             else:
-                self._tensorspace_categorical_dimensions.append(index-skipped_count)
+                self._tensorspace_categorical_dimensions.append(index - skipped_count)
                 # tensor_values = np.arange(len(param_values))
                 tensor_values = torch.arange(len(param_values), dtype=self.tensor_dtype)
 
             # write the mappings to the object
-            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values.tolist())))
-            self._map_tensor_to_param[index] = (dict(zip(tensor_values.tolist(), param_values)))
+            self._map_param_to_tensor[index] = dict(zip(param_values, tensor_values.tolist()))
+            self._map_tensor_to_param[index] = dict(zip(tensor_values.tolist(), param_values))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
-                self._tensorspace_bounds_indices.append(index-skipped_count)
+                self._tensorspace_bounds_indices.append(index - skipped_count)
 
         # do some checks
         assert len(self.params_values) == len(self._tensorspace_param_config_structure)
@@ -666,18 +675,18 @@ def initialize_tensorspace(self, dtype = None, device = None):
 
         # set the bounds in the correct format (one array for the min, one for the max)
         bounds = torch.tensor(bounds, **self.tensor_kwargs)
-        self._tensorspace_bounds = torch.cat([bounds[:,0], bounds[:,1]]).reshape((2, bounds.shape[0]))
-    
+        self._tensorspace_bounds = torch.cat([bounds[:, 0], bounds[:, 1]]).reshape((2, bounds.shape[0]))
+
     def get_tensorspace(self):
         """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first."""
         if self._tensorspace is None:
             self.initialize_tensorspace()
         return self._tensorspace
-    
+
     def get_tensorspace_categorical_dimensions(self):
         """Get the a list of the categorical dimensions in the tensorspace."""
         return self._tensorspace_categorical_dimensions
-    
+
     def param_config_to_tensor(self, param_config: tuple):
         """Convert from a parameter configuration to a Tensor."""
         if len(self._map_param_to_tensor) == 0:
@@ -685,7 +694,7 @@ def param_config_to_tensor(self, param_config: tuple):
         array = []
         for i, param in enumerate(param_config):
             if self._tensorspace_param_config_structure[i] is not None:
-                continue    # skip over parameters not in the tensorspace
+                continue  # skip over parameters not in the tensorspace
             mapping = self._map_param_to_tensor[i]
             conversions = [None, str, float, int, bool]
             for c in conversions:
@@ -697,7 +706,7 @@ def param_config_to_tensor(self, param_config: tuple):
                     if c == conversions[-1]:
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
         return torch.tensor(array, **self.tensor_kwargs)
-    
+
     def tensor_to_param_config(self, tensor: Tensor):
         """Convert from a Tensor to a parameter configuration."""
         assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
@@ -709,10 +718,10 @@ def tensor_to_param_config(self, tensor: Tensor):
             if param is not None:
                 skip_counter += 1
             else:
-                value = tensor[i-skip_counter].item()
+                value = tensor[i - skip_counter].item()
                 config[i] = self._map_tensor_to_param[i][value]
         return tuple(config)
-    
+
     def get_tensorspace_bounds(self):
         """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters."""
         if self._tensorspace is None:
@@ -929,7 +938,7 @@ def order_param_configs(
                 f"The number of ordered parameter configurations ({len(ordered_param_configs)}) differs from the original number of parameter configurations ({len(param_configs)})"
             )
         return ordered_param_configs
-    
+
     def to_ax_searchspace(self):
         """Convert this searchspace to an Ax SearchSpace."""
         from ax import ChoiceParameter, FixedParameter, ParameterType, SearchSpace
@@ -943,12 +952,14 @@ def to_ax_searchspace(self):
                 continue
 
             # convert the types
-            assert all(isinstance(param_values[0], type(v)) for v in param_values), f"Parameter values of mixed types are not supported: {param_values}"
+            assert all(
+                isinstance(param_values[0], type(v)) for v in param_values
+            ), f"Parameter values of mixed types are not supported: {param_values}"
             param_type_mapping = {
                 str: ParameterType.STRING,
                 int: ParameterType.INT,
                 float: ParameterType.FLOAT,
-                bool: ParameterType.BOOL
+                bool: ParameterType.BOOL,
             }
             param_type = param_type_mapping[type(param_values[0])]
 
@@ -959,6 +970,8 @@ def to_ax_searchspace(self):
                 ax_searchspace.add_parameter(ChoiceParameter(param_name, param_type, param_values))
 
         # add the constraints
-        raise NotImplementedError("Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity.")
+        raise NotImplementedError(
+            "Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity."
+        )
 
         return ax_searchspace

From 3c48b499d9726ce9d0ae71a1a0805a27f6a67f4a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:30:25 +0100
Subject: [PATCH 102/253] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/searchspace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 2c9f87633..befc2232e 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -707,7 +707,7 @@ def param_config_to_tensor(self, param_config: tuple):
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
         return torch.tensor(array, **self.tensor_kwargs)
 
-    def tensor_to_param_config(self, tensor: Tensor):
+    def tensor_to_param_config(self, tensor):
         """Convert from a Tensor to a parameter configuration."""
         assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
         if len(self._map_tensor_to_param) == 0:

From 388f3253d8f7497de0e6c7609bf1eb7be77c28fa Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:37:10 +0100
Subject: [PATCH 103/253] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
index ee552dcc7..df1b3fbb0 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
@@ -35,7 +35,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.inputs_transfer_learning: list[Tensor] = []
         self.outcomes_transfer_learning: list[Tensor] = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)

From 373782f4826d90a0dfa88cf609690080c7c9bdfa Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:39:24 +0100
Subject: [PATCH 104/253] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py     | 2 +-
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
index c371eb889..88101be2b 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
@@ -52,7 +52,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.outcomes_transfer_learning: list[Tensor] = []
         self.models_transfer_learning: list = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
index 1778d3ac2..814d5fcd0 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -36,7 +36,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.outcomes_transfer_learning: list[Tensor] = []
         self.models_transfer_learning: list = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)

From db3abb39b135532a6e9a562fde58ad0ddde7e4aa Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 14:22:33 +0100
Subject: [PATCH 105/253] Merge with searchspace_experiments

---
 kernel_tuner/searchspace.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6c718c33d..9fd08afd9 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -144,21 +144,21 @@ def __init__(
             else:
                 raise ValueError(f"Invalid framework parameter {framework}")
 
-        # get the solver given the solver method argument
-        solver = ""
-        if solver_method.lower() == "pc_backtrackingsolver":
-            solver = BacktrackingSolver()
-        elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
-            solver = OptimizedBacktrackingSolver(forwardcheck=False)
-        elif solver_method.lower() == "pc_parallelsolver":
-            raise NotImplementedError("ParallelSolver is not yet implemented")
-            # solver = ParallelSolver()
-        elif solver_method.lower() == "pc_recursivebacktrackingsolver":
-            solver = RecursiveBacktrackingSolver()
-        elif solver_method.lower() == "pc_minconflictssolver":
-            solver = MinConflictsSolver()
-        else:
-            raise ValueError(f"Solver method {solver_method} not recognized.")
+            # get the solver given the solver method argument
+            solver = ""
+            if solver_method.lower() == "pc_backtrackingsolver":
+                solver = BacktrackingSolver()
+            elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
+                solver = OptimizedBacktrackingSolver(forwardcheck=False)
+            elif solver_method.lower() == "pc_parallelsolver":
+                raise NotImplementedError("ParallelSolver is not yet implemented")
+                # solver = ParallelSolver()
+            elif solver_method.lower() == "pc_recursivebacktrackingsolver":
+                solver = RecursiveBacktrackingSolver()
+            elif solver_method.lower() == "pc_minconflictssolver":
+                solver = MinConflictsSolver()
+            else:
+                raise ValueError(f"Solver method {solver_method} not recognized.")
 
             # build the search space
             self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)

From c692ba6cfa725cc96bb7b0ef7f495eef8d289bbf Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 14:45:04 +0100
Subject: [PATCH 106/253] Loosened required positional arguments

---
 kernel_tuner/searchspace.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 9fd08afd9..8265c44ab 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -70,8 +70,8 @@ def __init__(
             tune_params = from_cache["tune_params"]
         if from_cache is None:
             assert (
-                tune_params is not None and restrictions is not None and max_threads is not None
-            ), "Must specify positional arugments ."
+                tune_params is not None and max_threads is not None
+            ), "Must specify positional arguments."
 
         # set the object attributes using the arguments
         framework_l = framework.lower()

From fe113e6f534761136bea82acffe8283a34d833a8 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 14:56:34 +0100
Subject: [PATCH 107/253] Changed benchmarks location for hypertuner

---
 kernel_tuner/backends/hypertuner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 0b1c69adb..15867fbd9 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -68,12 +68,12 @@ def compile(self, kernel_instance):
         applications = [
             {
                 "name": "dedispersion_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "folder": "../autotuning_methodology/benchmark_hub/kernels",
                 "input_file": "dedispersion_milo.json"
             },
             {
                 "name": "convolution_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "folder": "../autotuning_methodology/benchmark_hub/kernels",
                 "input_file": "convolution_milo.json"
             }
         ]

From 5e65abdfbe3408b844dfa112ac14a8002b8fd6e1 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 15:03:02 +0100
Subject: [PATCH 108/253] Used hip-python-fork package as hip-python is not
 available

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 717fa9bdd..4b2d721ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,7 +59,7 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.10"          # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+python = ">=3.10,<3.15"    # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"          # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.14.1"
 botorch = ">=0.12.0"
@@ -83,7 +83,7 @@ pynvml = { version = "^11.4.1", optional = true }
 # OpenCL
 pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
 # HIP
-hip-python = { version = "*", optional = true }
+hip-python-fork = { version = "*", optional = true }
 # Tutorial (for the notebooks used in the examples)
 jupyter = { version = "^1.0.0", optional = true }
 matplotlib = { version = "^3.5.0", optional = true }

From 0ba00a0403b82913eaa9db3ede946e16dd124702 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 15:09:50 +0100
Subject: [PATCH 109/253] Removed transfer learning references

---
 kernel_tuner/interface.py                     |  38 +--
 .../bayes_opt_BOTorch_transfer_RGPE.py        | 322 ------------------
 .../bayes_opt_BOTorch_transfer_direct.py      | 141 --------
 .../bayes_opt_BOTorch_transfer_weighted.py    | 156 ---------
 4 files changed, 1 insertion(+), 656 deletions(-)
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 38855dd90..45a4bfef9 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,9 +52,6 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
-    bayes_opt_BOTorch_transfer_direct,
-    bayes_opt_BOTorch_transfer_RGPE,
-    bayes_opt_BOTorch_transfer_weighted,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -94,9 +91,6 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
-    "bayes_opt_BOTorch_transfer_direct": bayes_opt_BOTorch_transfer_direct,
-    "bayes_opt_BOTorch_transfer_weighted": bayes_opt_BOTorch_transfer_weighted,
-    "bayes_opt_BOTorch_transfer_RGPE": bayes_opt_BOTorch_transfer_RGPE,
 }
 
 
@@ -483,15 +477,6 @@ def __deepcopy__(self, _):
                 "string",
             ),
         ),
-        (
-            "transfer_learning_caches",
-            (
-                """Array of filepaths to caches to use for transfer learning.
-        Filename uses suffix ".json", which is appended if missing.
-        """,
-                "list(string) or list(Path)",
-            ),
-        ),
         ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
         ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
         ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
@@ -608,7 +593,6 @@ def tune_kernel(
     observers=None,
     objective=None,
     objective_higher_is_better=None,
-    transfer_learning_caches=[],
 ):
     start_overhead_time = perf_counter()
     if log:
@@ -710,15 +694,6 @@ def preprocess_cache(filepath):
         tuning_options.cache = {}
         tuning_options.cachefile = None
 
-    # process transfer learning caches
-    tuning_options.transfer_learning_caches = []
-    if transfer_learning_caches and len(transfer_learning_caches) > 0:
-        for transfer_learning_cache in transfer_learning_caches:
-            cache = preprocess_cache(transfer_learning_cache)
-            assert cache != tuning_options.cache, "Transfer learning cache can not be the same as current cache"
-            cache_data = util.read_cache(cache, open_cache=False)
-            tuning_options.transfer_learning_caches.append(cache_data)
-
     # create search space
     searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads)
     restrictions = searchspace._modified_restrictions
@@ -912,19 +887,9 @@ def tune_kernel_T1(
     device = kernelspec["Device"]["Name"]
     strategy = inputs["Search"]["Name"]
 
-    # set the cache and transfer learning cache paths
+    # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:
         cache_filepath = Path(kernelspec["SimulationInput"])
-    cache_dir = Path(cache_filepath).parent
-    # TODO remove in production!
-    transfer_learning_caches = [
-        p
-        for p in cache_dir.iterdir()
-        if len(p.suffixes) > 0
-        and p.suffixes[-1].endswith(".json")
-        and not p.stem.endswith("_T4")
-        and p.name != cache_filepath.name
-    ]
 
     # get the grid divisions
     grid_divs = {}
@@ -1018,7 +983,6 @@ def tune_kernel_T1(
         strategy_options=strategy_options,
         objective=objective,
         objective_higher_is_better=objective_higher_is_better,
-        transfer_learning_caches=transfer_learning_caches,
     )
     if output_T4:
         return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
deleted file mode 100644
index 88101be2b..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
+++ /dev/null
@@ -1,322 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
-
-try:
-    import torch
-    from botorch.acquisition import qLogNoisyExpectedImprovement
-    from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
-    from botorch.models.gpytorch import GPyTorchModel
-    from botorch.optim.optimize import optimize_acqf_discrete_local_search
-    from botorch.sampling.normal import SobolQMCNormalSampler
-    from gpytorch.distributions import MultivariateNormal
-    from gpytorch.lazy import PsdSumLazyTensor
-    from gpytorch.likelihoods import LikelihoodList
-    from gpytorch.models import GP
-    from torch import Tensor
-    from torch.nn import ModuleList
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
-from kernel_tuner.util import StopCriterionReached
-
-# settings
-NUM_BASE_TASKS = 5
-N_BATCH = 10
-NUM_POSTERIOR_SAMPLES = 256
-RANDOM_INITIALIZATION_SIZE = 3
-N_TRIALS = 10
-MC_SAMPLES = 512
-N_RESTART_CANDIDATES = 512
-N_RESTARTS = 10
-Q_BATCH_SIZE = 1
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimizationTransfer(BayesianOptimization):
-    """Bayesian Optimization class with transfer learning."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        super().__init__(searchspace, runner, tuning_options)
-
-        # set up the data and model for each transfer learning base task
-        self.searchspaces_transfer_learning: list[Searchspace] = []
-        self.inputs_transfer_learning: list[Tensor] = []
-        self.outcomes_transfer_learning: list[Tensor] = []
-        self.models_transfer_learning: list = []
-        for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
-            # construct the searchspace for this task
-            tensor_kwargs = searchspace.tensor_kwargs
-            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
-            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
-            self.searchspaces_transfer_learning.append(tl_searchspace)
-
-            # get the inputs and outcomes for this task
-            inputs = []
-            outcomes = []
-            for c in tl_cache["cache"].values():
-                result = c[tuning_options.objective]
-                if self.is_valid_result(result):
-                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
-                    inputs.append(tl_searchspace.param_config_to_tensor(config))
-                    if not self.maximize:
-                        result = -result
-                    outcomes.append(result)
-            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
-            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
-            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
-            self.inputs_transfer_learning.append(tl_inputs)
-            self.outcomes_transfer_learning.append(tl_outcomes)
-
-            # fit a model and likelihood for this task
-            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
-            mll = self.fit(mll)
-            self.models_transfer_learning.append(model)
-    
-    def roll_col(self, X, shift):
-        """Rotate columns to right by shift."""
-        return torch.cat((X[..., -shift:], X[..., :-shift]), dim=-1)
-    
-    def compute_ranking_loss(self, f_samps, target_y):
-        """Compute ranking loss for each sample from the posterior over target points.
-
-        Args:
-            f_samps: `n_samples x (n) x n`-dim tensor of samples
-            target_y: `n x 1`-dim tensor of targets
-        Returns:
-            Tensor: `n_samples`-dim tensor containing the ranking loss across each sample
-        """
-        n = target_y.shape[0]
-        if f_samps.ndim == 3:
-            # Compute ranking loss for target model
-            # take cartesian product of target_y
-            cartesian_y = torch.cartesian_prod(
-                target_y.squeeze(-1),
-                target_y.squeeze(-1),
-            ).view(n, n, 2)
-            # the diagonal of f_samps are the out-of-sample predictions
-            # for each LOO model, compare the out of sample predictions to each in-sample prediction
-            rank_loss = (
-                (
-                    (f_samps.diagonal(dim1=1, dim2=2).unsqueeze(-1) < f_samps)
-                    ^ (cartesian_y[..., 0] < cartesian_y[..., 1])
-                )
-                .sum(dim=-1)
-                .sum(dim=-1)
-            )
-        else:
-            rank_loss = torch.zeros(
-                f_samps.shape[0], dtype=torch.long, device=target_y.device
-            )
-            y_stack = target_y.squeeze(-1).expand(f_samps.shape)
-            for i in range(1, target_y.shape[0]):
-                rank_loss += (
-                    (self.roll_col(f_samps, i) < f_samps) ^ (self.roll_col(y_stack, i) < y_stack)
-                ).sum(dim=-1)
-        return rank_loss
-    
-    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples, no_state=False):
-        """Create a batch-mode LOOCV GP and draw a joint sample across all points from the target task.
-
-        Args:
-            train_x: `n x d` tensor of training points
-            train_y: `n x 1` tensor of training targets
-            target_model: fitted target model
-            num_samples: number of mc samples to draw
-
-        Return: `num_samples x n x n`-dim tensor of samples, where dim=1 represents the `n` LOO models,
-            and dim=2 represents the `n` training points.
-        """
-        batch_size = len(train_x)
-        masks = torch.eye(len(train_x), dtype=torch.uint8, device=self.tensor_device).bool()
-        train_x_cv = torch.stack([train_x[~m] for m in masks])
-        train_y_cv = torch.stack([train_y[~m] for m in masks])
-        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks]) if train_yvar is not None else None
-
-        # use a state dictionary for fast updates
-        if no_state:
-            state_dict_expanded = None
-        else:
-            state_dict = target_model.state_dict()
-
-            # expand to batch size of batch_mode LOOCV model
-            state_dict_expanded = {
-                name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
-                for name, t in state_dict.items()
-            }
-        
-        model, _ = self.get_model_and_likelihood(
-            self.searchspace, train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
-        )
-        with torch.no_grad():
-            posterior = model.posterior(train_x)
-            # Since we have a batch mode gp and model.posterior always returns an output dimension,
-            # the output from `posterior.sample()` here `num_samples x n x n x 1`, so let's squeeze
-            # the last dimension.
-            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
-            return sampler(posterior).squeeze(-1)
-    
-    def compute_rank_weights(self, train_x, train_y, train_yvar, base_models, target_model, num_samples, no_state=False):
-        """Compute ranking weights for each base model and the target model (using LOOCV for the target model).
-        
-        Note: This implementation does not currently address weight dilution, since we only have a small number of base models.
-
-        Args:
-            train_x: `n x d` tensor of training points (for target task)
-            train_y: `n` tensor of training targets (for target task)
-            base_models: list of base models
-            target_model: target model
-            num_samples: number of mc samples
-
-        Returns:
-            Tensor: `n_t`-dim tensor with the ranking weight for each model
-        """
-        ranking_losses = []
-
-        # compute ranking loss for each base model
-        for model in base_models:
-            # compute posterior over training points for target task
-            posterior = model.posterior(train_x)
-            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
-            base_f_samps = sampler(posterior).squeeze(-1).squeeze(-1)
-            # compute and save ranking loss
-            ranking_losses.append(self.compute_ranking_loss(base_f_samps, train_y))
-
-        # compute ranking loss for target model using LOOCV
-        # f_samps
-        target_f_samps = self.get_target_model_loocv_sample_preds(
-            train_x,
-            train_y,
-            train_yvar,
-            target_model,
-            num_samples,
-            no_state=no_state,
-        )
-        ranking_losses.append(self.compute_ranking_loss(target_f_samps, train_y))
-        ranking_loss_tensor = torch.stack(ranking_losses)
-        # compute best model (minimum ranking loss) for each sample
-        best_models = torch.argmin(ranking_loss_tensor, dim=0)
-        # compute proportion of samples for which each model is best
-        rank_weights = (
-            best_models.bincount(minlength=len(ranking_losses)).type_as(train_x)
-            / num_samples
-        )
-        return rank_weights
-    
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-            first_loop = self.initial_sample_size > 0
-
-            # Bayesian optimization loop
-            for _ in range(fevals_left):
-
-                # fit a Gaussian Process model
-                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
-
-                # calculate the rank weights
-                model_list = self.models_transfer_learning + [model]
-                rank_weights = self.compute_rank_weights(
-                    self.train_X,
-                    self.train_Y,
-                    self.train_Yvar,
-                    self.models_transfer_learning,
-                    model,
-                    NUM_POSTERIOR_SAMPLES,
-                    no_state=first_loop,
-                )
-
-                # create rank model and acquisition function
-                rgpe_model = RGPE(model_list, rank_weights)
-                # acqf = LogExpectedImprovement(model=rgpe_model, best_f=self.train_Y.max(), maximize=True)
-                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
-                qNEI = qLogNoisyExpectedImprovement(
-                    model=rgpe_model,
-                    X_baseline=self.train_X,
-                    sampler=sampler_qnei,
-                    prune_baseline=False,
-                )
-
-                # optimize
-                candidate, _ = optimize_acqf_discrete_local_search(
-                    acq_function=qNEI,
-                    discrete_choices=self.searchspace_tensors,
-                    q=Q_BATCH_SIZE,
-                    num_restarts=N_RESTARTS,
-                    raw_samples=N_RESTART_CANDIDATES,
-                    max_batch_size=max_batch_size
-                )
-                    
-                # evaluate the new candidate
-                self.evaluate_configs(candidate)
-                fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if fevals_left > 0:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-                    first_loop = False
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results
-
-
-class RGPE(GP, GPyTorchModel):
-    """Rank-weighted GP ensemble.
-    
-    Note: this class inherits from GPyTorchModel which provides an interface for GPyTorch models in botorch.
-    """
-
-    _num_outputs = 1  # metadata for botorch
-
-    def __init__(self, models, weights):
-        super().__init__()
-        self.models = ModuleList(models)
-        for m in models:
-            if not hasattr(m, "likelihood"):
-                raise ValueError(
-                    "RGPE currently only supports models that have a likelihood (e.g. ExactGPs)"
-                )
-        self.likelihood = LikelihoodList(*[m.likelihood for m in models])
-        self.weights = weights
-        self.to(weights)
-
-    def forward(self, x):
-        weighted_means = []
-        weighted_covars = []
-        # filter model with zero weights
-        # weights on covariance matrices are weight**2
-        non_zero_weight_indices = (self.weights**2 > 0).nonzero()
-        non_zero_weights = self.weights[non_zero_weight_indices]
-        # re-normalize
-        non_zero_weights /= non_zero_weights.sum()
-
-        for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
-            raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
-            model = self.models[raw_idx]
-            posterior = model.posterior(x)
-            # unstandardize predictions
-            posterior_mean = posterior.mean.squeeze(-1)
-            posterior_cov = posterior.mvn.lazy_covariance_matrix
-            # apply weight
-            weight = non_zero_weights[non_zero_weight_idx]
-            weighted_means.append(weight * posterior_mean)
-            weighted_covars.append(posterior_cov * weight**2)
-        # set mean and covariance to be the rank-weighted sum the means and covariances of the
-        # base models and target model
-        mean_x = torch.stack(weighted_means).sum(dim=0)
-        covar_x = PsdSumLazyTensor(*weighted_covars)
-        return MultivariateNormal(mean_x, covar_x)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
deleted file mode 100644
index df1b3fbb0..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
-
-try:
-    import torch
-    from botorch.acquisition import LogExpectedImprovement
-    from botorch.optim.optimize import optimize_acqf_discrete
-    from torch import Tensor
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-from math import ceil, sqrt
-
-import numpy as np
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
-from kernel_tuner.util import StopCriterionReached
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimizationTransfer(BayesianOptimization):
-    """Bayesian Optimization class with transfer learning."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        super().__init__(searchspace, runner, tuning_options)
-
-        # set up the data and model for each transfer learning base task
-        self.searchspaces_transfer_learning: list[Searchspace] = []
-        self.inputs_transfer_learning: list[Tensor] = []
-        self.outcomes_transfer_learning: list[Tensor] = []
-        for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
-            # construct the searchspace for this task
-            tensor_kwargs = searchspace.tensor_kwargs
-            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
-            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
-            self.searchspaces_transfer_learning.append(tl_searchspace)
-
-            # get the inputs and outcomes for this task
-            inputs = []
-            outcomes = []
-            for c in tl_cache["cache"].values():
-                result = c[tuning_options.objective]
-                if self.is_valid_result(result):
-                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
-                    inputs.append(tl_searchspace.param_config_to_tensor(config))
-                    if not self.maximize:
-                        result = -result
-                    outcomes.append(result)
-            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
-            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
-            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
-            self.inputs_transfer_learning.append(tl_inputs)
-            self.outcomes_transfer_learning.append(tl_outcomes)
-    
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-
-            # create array to gradually reduce number of optimization spaces as fewer fevals are left
-            tensorspace_size = self.searchspace_tensors.size(0)
-            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
-            fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
-            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
-            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
-            # if there's a discrepency, add or subtract the difference from the first number
-            if np.sum(nums_optimization_spaces) != fevals_left:
-                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
-            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
-            fevals_left += reserve_final_loops
-
-            # get the number of top configurations to select from transfer learning caches
-            num_tl_caches = len(self.outcomes_transfer_learning)
-            use_top_configs_until_loop = np.argmax(nums_optimization_spaces < num_tl_caches+1)  # stop after we have fewer num_optimization_spaces than caches - because with more caches the ACQF will not be used as much
-            num_top_configs = sum([ceil(n/(num_tl_caches+1)) for n in nums_optimization_spaces[:use_top_configs_until_loop]])
-
-            # select the top configurations for each of the transfer learning caches
-            top_configs = [list() for _ in self.outcomes_transfer_learning]
-            for tl_index, tl_outcomes in enumerate(self.outcomes_transfer_learning):
-                top_configs[tl_index] = self.inputs_transfer_learning[tl_outcomes.argmax()[:num_top_configs]] # TODO check if correct
-
-            # # if there are duplicate configurations, move them up and make sure there are only unique configs
-            # duplicate configurations are inserted at the num_configs-(sum of indices)th index
-            # # TODO
-
-            # Bayesian optimization loop
-            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
-                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
-
-                # calculate how many of the optimization spaces to optimize using GP
-                optimize_with_GP = max(round(num_optimization_spaces/(num_tl_caches+1)), 1)
-
-                # divide the optimization space into random chuncks
-                tensorspace_size = self.searchspace_tensors.size(0)
-                if num_optimization_spaces <= 1:
-                    optimization_spaces = [self.searchspace_tensors]
-                else:
-                    # shuffle the searchspace
-                    shuffled_indices = torch.randperm(tensorspace_size)
-                    tensorspace = self.searchspace_tensors[shuffled_indices]
-                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
-
-                # fit on a Gaussian Process model
-                mll = self.fit(mll)
-
-                # define the acquisition functions
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                
-                # optimize acquisition function to find the next evaluation point
-                for i, optimization_space in enumerate(optimization_spaces):
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
-                    
-                    # evaluate the new candidate
-                    self.evaluate_configs(candidate)
-                    fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if loop_i < len(nums_optimization_spaces) - 1:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
deleted file mode 100644
index 814d5fcd0..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ /dev/null
@@ -1,156 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
-
-try:
-    import torch
-    from botorch.acquisition import LogExpectedImprovement
-    from botorch.optim.optimize import optimize_acqf_discrete
-    from torch import Tensor
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-from math import ceil, sqrt
-
-import numpy as np
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
-from kernel_tuner.util import StopCriterionReached
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimizationTransfer(BayesianOptimization):
-    """Bayesian Optimization class with transfer learning."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        super().__init__(searchspace, runner, tuning_options)
-
-        # set up the data and model for each transfer learning base task
-        self.searchspaces_transfer_learning: list[Searchspace] = []
-        self.inputs_transfer_learning: list[Tensor] = []
-        self.outcomes_transfer_learning: list[Tensor] = []
-        self.models_transfer_learning: list = []
-        for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
-            # construct the searchspace for this task
-            tensor_kwargs = searchspace.tensor_kwargs
-            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
-            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
-            self.searchspaces_transfer_learning.append(tl_searchspace)
-
-            # get the inputs and outcomes for this task
-            inputs = []
-            outcomes = []
-            for c in tl_cache["cache"].values():
-                result = c[tuning_options.objective]
-                if self.is_valid_result(result):
-                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
-                    inputs.append(tl_searchspace.param_config_to_tensor(config))
-                    if not self.maximize:
-                        result = -result
-                    outcomes.append(result)
-            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
-            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
-            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
-            self.inputs_transfer_learning.append(tl_inputs)
-            self.outcomes_transfer_learning.append(tl_outcomes)
-
-            # fit a model and likelihood for this task
-            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
-            mll = self.fit(mll)
-            self.models_transfer_learning.append(model)
-    
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-
-            # create array to gradually reduce number of optimization spaces as fewer fevals are left
-            tensorspace_size = self.searchspace_tensors.size(0)
-            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
-            fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
-            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
-            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
-            # if there's a discrepency, add or subtract the difference from the first number
-            if np.sum(nums_optimization_spaces) != fevals_left:
-                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
-            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
-            fevals_left += reserve_final_loops
-
-            # create the acquisition functions for the transferred GPs
-            acqfs = [LogExpectedImprovement(model=m, best_f=self.outcomes_transfer_learning[i].max(), maximize=True) for i, m in enumerate(self.models_transfer_learning)]
-            acqfs_results = [list() for _ in acqfs]
-
-            # Bayesian optimization loop
-            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
-                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
-
-                # fit on a Gaussian Process model
-                mll = self.fit(mll)
-
-                # divide the optimization space into random chuncks
-                tensorspace_size = self.searchspace_tensors.size(0)
-                if num_optimization_spaces <= 1:
-                    optimization_spaces = [self.searchspace_tensors]
-                else:
-                    # shuffle the searchspace
-                    shuffled_indices = torch.randperm(tensorspace_size)
-                    tensorspace = self.searchspace_tensors[shuffled_indices]
-                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
-
-                # set which acqfuisition function is used at each point of the optimization space loop
-                if num_optimization_spaces > len(self.models_transfer_learning):
-                    # all models get a proportional turn
-                    selected_acqfs = np.linspace(start=0, stop=len(acqfs), num=num_optimization_spaces)
-                    selected_acqfs = selected_acqfs.round(0).astype(int)
-                    selected_acqfs = selected_acqfs.clip(0, len(acqfs)-1)
-                elif num_optimization_spaces == len(self.models_transfer_learning):
-                    # all models get one turn
-                    selected_acqfs = list(range(num_optimization_spaces))
-                elif num_optimization_spaces == 1:
-                    # only the target model is used
-                    selected_acqfs = [0]
-                else:
-                    # only select the target + best performing models (can include target as well)
-                    acqfs_means = np.array([np.mean(r) for r in acqfs_results])
-                    selected_acqfs = [0] + np.argpartition(acqfs_means, -num_optimization_spaces-1)[-num_optimization_spaces-1:]
-                    selected_acqfs = selected_acqfs.round(0).astype(int).clip(0, num_optimization_spaces-1)
-
-                # define the acquisition functions
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                current_acqfs = [acqf] + acqfs
-                
-                # optimize acquisition function to find the next evaluation point
-                for i, optimization_space in enumerate(optimization_spaces):
-                    acqfs_index = selected_acqfs[i]
-                    candidate, _ = optimize_acqf_discrete(
-                        current_acqfs[acqfs_index], 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
-                    
-                    # evaluate the new candidate
-                    result = self.evaluate_configs(candidate)
-                    if len(result) == 1:
-                        acqfs_results[acqfs_index].append(result[0])
-                    fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if loop_i < len(nums_optimization_spaces) - 1:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results

From 15175655e42c4053a34b4d41cd1724fb750fbc9b Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 7 Mar 2025 16:50:41 +0100
Subject: [PATCH 110/253] add support for user-defined optimization algorithms

---
 examples/cuda/vector_add_custom_strategy.py | 44 +++++++++++++++++++++
 kernel_tuner/interface.py                   | 32 +++++----------
 2 files changed, 53 insertions(+), 23 deletions(-)
 create mode 100644 examples/cuda/vector_add_custom_strategy.py

diff --git a/examples/cuda/vector_add_custom_strategy.py b/examples/cuda/vector_add_custom_strategy.py
new file mode 100644
index 000000000..29d873d5d
--- /dev/null
+++ b/examples/cuda/vector_add_custom_strategy.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+"""This is the minimal example from the README"""
+
+import numpy
+import kernel_tuner
+from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
+
+def tune():
+
+    kernel_string = """
+    __global__ void vector_add(float *c, float *a, float *b, int n) {
+        int i = blockIdx.x * block_size_x + threadIdx.x;
+        if (i<n) {
+            c[i] = a[i] + b[i];
+        }
+    }
+    """
+
+    size = 10000000
+
+    a = numpy.random.randn(size).astype(numpy.float32)
+    b = numpy.random.randn(size).astype(numpy.float32)
+    c = numpy.zeros_like(b)
+    n = numpy.int32(size)
+
+    args = [c, a, b, n]
+
+    tune_params = dict()
+    tune_params["block_size_x"] = [128+64*i for i in range(15)]
+
+    results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params, strategy=kernel_tuner.strategies.minimize, verbose=True)
+
+    # Store the tuning results in an output file
+    store_output_file("vector_add.json", results, tune_params)
+
+    # Store the metadata of this run
+    store_metadata_file("vector_add-metadata.json")
+
+    return results
+
+
+if __name__ == "__main__":
+    tune()
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 97ae22848..46e4efdef 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -621,29 +621,15 @@ def tune_kernel(
         if strategy in strategy_map:
             strategy = strategy_map[strategy]
         else:
-            raise ValueError(f"Unkown strategy {strategy}, must be one of: {', '.join(list(strategy_map.keys()))}")
-
-        # make strategy_options into an Options object
-        if tuning_options.strategy_options:
-            if not isinstance(strategy_options, Options):
-                tuning_options.strategy_options = Options(strategy_options)
-
-            # select strategy based on user options
-            if "fraction" in tuning_options.strategy_options and not tuning_options.strategy == "random_sample":
-                raise ValueError(
-                    'It is not possible to use fraction in combination with strategies other than "random_sample". '
-                    'Please set strategy="random_sample", when using "fraction" in strategy_options'
-                )
-
-            # check if method is supported by the selected strategy
-            if "method" in tuning_options.strategy_options:
-                method = tuning_options.strategy_options.method
-                if method not in strategy.supported_methods:
-                    raise ValueError("Method %s is not supported for strategy %s" % (method, tuning_options.strategy))
-
-        # if no strategy_options dict has been passed, create empty dictionary
-        else:
-            tuning_options.strategy_options = Options({})
+            # check for user-defined strategy
+            if hasattr(strategy, "tune") and callable(strategy.tune):
+                # user-defined strategy
+                pass
+            else:
+                raise ValueError(f"Unkown strategy {strategy}, must be one of: {', '.join(list(strategy_map.keys()))}")
+
+        # ensure strategy_options is an Options object
+        tuning_options.strategy_options = Options(strategy_options or {})
 
     # if no strategy selected
     else:

From 6633bed01374372a17d0553ddf736223474fe1c9 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 17:35:22 +0100
Subject: [PATCH 111/253] Updated pyproject

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4b2d721ab..633a6f04b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,7 +62,6 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 python = ">=3.10,<3.15"    # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"          # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.14.1"
-botorch = ">=0.12.0"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
 python-constraint2 = "^2.1.0"

From c39ac5a9e3137348cda1d8fb8a848acf98797ca5 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 21:30:11 +0100
Subject: [PATCH 112/253] Adjusted hyper.py for paper

---
 kernel_tuner/hyper.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 9c052d033..040fd09bb 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -61,7 +61,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
     # pass a temporary cache file to avoid duplicate execution
     if 'cache' not in kwargs:
         cachefile = get_random_unique_filename('temp_', '.json')
-        cachefile = Path("hyperparamtuning_milo_bruteforce_dual_annealing.json")
+        cachefile = Path(f"hyperparamtuning_paper_bruteforce_{target_strategy}.json")
         kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
@@ -88,14 +88,14 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
-    # hyperparams = {
-    #     'popsize': [10, 20, 30],
-    #     'maxiter': [50, 100, 150],
-    #     'w': [0.25, 0.5, 0.75],
-    #     'c1': [1.0, 2.0, 3.0],
-    #     'c2': [0.5, 1.0, 1.5]
-    # }
-    # result, env = tune_hyper_params('pso', hyperparams)
+    hyperparams = {
+        'popsize': [10, 20, 30],
+        'maxiter': [50, 100, 150],
+        'w': [0.25, 0.5, 0.75],
+        'c1': [1.0, 2.0, 3.0],
+        'c2': [0.5, 1.0, 1.5]
+    }
+    result, env = tune_hyper_params('pso', hyperparams)
 
     # hyperparams = {
     #     'neighbor': ['Hamming', 'adjacent'],
@@ -105,10 +105,10 @@ def put_if_not_present(target_dict, key, value):
     # }
     # result, env = tune_hyper_params('greedy_ils', hyperparams)
 
-    hyperparams = {
-        'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
-    }
-    result, env = tune_hyper_params('dual_annealing', hyperparams)
+    # hyperparams = {
+    #     'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
+    # }
+    # result, env = tune_hyper_params('dual_annealing', hyperparams)
 
     print(result)
     print(env['best_config'])

From cc19515035e5186b2b8ddb9a5ee93439b82663de Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 21:31:09 +0100
Subject: [PATCH 113/253] Extended hypertuner with additional kernels, adjusted
 for benchmark_hub

---
 kernel_tuner/backends/hypertuner.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 15867fbd9..f05f56455 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -64,17 +64,28 @@ def compile(self, kernel_instance):
         # gpus = ["RTX_3090", "RTX_2080_Ti"]
         # applications = None
 
-        gpus = ["A100", "A4000", "MI250X", "W6600"]
+        gpus = ["A100", "W6600"]
+        folder = "../../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
                 "name": "dedispersion_milo",
-                "folder": "../autotuning_methodology/benchmark_hub/kernels",
+                "folder": folder,
                 "input_file": "dedispersion_milo.json"
             },
             {
                 "name": "convolution_milo",
-                "folder": "../autotuning_methodology/benchmark_hub/kernels",
+                "folder": folder,
                 "input_file": "convolution_milo.json"
+            },
+            {
+                "name": "hotspot_milo",
+                "folder": folder,
+                "input_file": "hotspot_milo.json"
+            },
+            {
+                "name": "gemm_milo",
+                "folder": folder,
+                "input_file": "gemm_milo.json"
             }
         ]
 
@@ -93,6 +104,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
+                "repeats": 10,
                 "samples": self.iterations 
             }
         }

From 638d216b6e1d4e0db0f9a1d8dcd77eadbbddaa3b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 8 Mar 2025 07:14:58 +0100
Subject: [PATCH 114/253] Implemented passing strategy to hyperparametertune by
 CLI argument

---
 kernel_tuner/hyper.py | 55 +++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 040fd09bb..2017a3c9b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -3,6 +3,7 @@
 
 from pathlib import Path
 from random import randint
+from argparse import ArgumentParser
 
 import kernel_tuner
 
@@ -87,28 +88,36 @@ def put_if_not_present(target_dict, key, value):
             result_unique[config_id] = r
     return list(result_unique.values()), env
 
-if __name__ == "__main__":  # TODO remove in production
-    hyperparams = {
-        'popsize': [10, 20, 30],
-        'maxiter': [50, 100, 150],
-        'w': [0.25, 0.5, 0.75],
-        'c1': [1.0, 2.0, 3.0],
-        'c2': [0.5, 1.0, 1.5]
-    }
-    result, env = tune_hyper_params('pso', hyperparams)
-
-    # hyperparams = {
-    #     'neighbor': ['Hamming', 'adjacent'],
-    #     'restart': [True, False],
-    #     'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
-    #     'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
-    # }
-    # result, env = tune_hyper_params('greedy_ils', hyperparams)
-
-    # hyperparams = {
-    #     'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
-    # }
-    # result, env = tune_hyper_params('dual_annealing', hyperparams)
-
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("strategy_to_tune")
+    args = parser.parse_args()
+    strategy_to_tune = args.strategy_to_tune
+
+    # select the hyperparameter parameters for the selected optimization algorithm
+    if strategy_to_tune.lower() == "pso":
+        hyperparams = {
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+            'w': [0.25, 0.5, 0.75],
+            'c1': [1.0, 2.0, 3.0],
+            'c2': [0.5, 1.0, 1.5]
+        }
+    elif strategy_to_tune.lower() == "greedy_ils":
+        hyperparams = {
+            'neighbor': ['Hamming', 'adjacent'],
+            'restart': [True, False],
+            'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
+            'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+        }
+    elif strategy_to_tune.lower() == "dual_annealing":
+        hyperparams = {
+            'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
+        }
+    else:
+        raise ValueError(f"Invalid argument {strategy_to_tune=}")
+
+    # run the hyperparameter tuning
+    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams)
     print(result)
     print(env['best_config'])

From d36adb5eeb3eb0d280f6672249929d6bfd5a175a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 8 Mar 2025 07:15:28 +0100
Subject: [PATCH 115/253] Extended hyperparmeter tuning with 4 more strategies

---
 kernel_tuner/hyper.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 2017a3c9b..d6785c899 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -114,6 +114,30 @@ def put_if_not_present(target_dict, key, value):
         hyperparams = {
             'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
         }
+    elif strategy_to_tune.lower() == "diff_evo":
+        hyperparams = {
+            'method': ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"],
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+        }
+    elif strategy_to_tune.lower() == "basinhopping":
+        hyperparams = {
+            'method': ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"],
+            'T': [0.5, 1.0, 1.5],
+        }
+    elif strategy_to_tune.lower() == "genetic_algorithm":
+        hyperparams = {
+            'method': ["single_point", "two_point", "uniform", "disruptive_uniform"],
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+            'mutation_chance': [5, 10, 20]
+        }
+    elif strategy_to_tune.lower() == "mls":
+        hyperparams = {
+            'neighbor': ["Hamming", "adjacent"],
+            'restart': [True, False],
+            'randomize': [True, False]
+        }
     else:
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 

From 4e46459b2859a5cabd97292eb72ce29abb5b4b1a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 8 Mar 2025 23:39:27 +0100
Subject: [PATCH 116/253] Generate a unique filename for generated experiment
 files to avoid collisions in parallel runs

---
 kernel_tuner/backends/hypertuner.py | 5 ++++-
 kernel_tuner/hyper.py               | 6 +++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index f05f56455..ec818b6b3 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -111,7 +111,7 @@ def compile(self, kernel_instance):
 
         name = kernel_instance.name if len(kernel_instance.name) > 0 else kernel_instance.kernel_source.kernel_name
         experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, applications, gpus, 
-                                                        override=override, overwrite_existing_file=True)
+                                                        override=override, generate_unique_file=True, overwrite_existing_file=True)
         return str(experiments_filepath)
     
     def start_event(self):
@@ -134,6 +134,9 @@ def run_kernel(self, func, gpu_args=None, threads=None, grid=None, stream=None):
         # run the methodology to get a fitness score for this configuration
         scores = get_strategy_scores(str(experiments_filepath))
         self.last_score = scores[list(scores.keys())[0]]['score']
+
+        # remove the experiments file
+        experiments_filepath.unlink()
     
     def memset(self, allocation, value, size):
         return super().memset(allocation, value, size)
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index d6785c899..6df76370f 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -75,8 +75,12 @@ def put_if_not_present(target_dict, key, value):
     kwargs['verify'] = None
     arguments = [target_strategy]
 
+    # IMPORTANT when running this script in parallel, always make sure the below name is unique among your runs!
+    # e.g. when parallalizing over the hypertuning of multiple strategies, use the strategy name
+    name = f"hyperparamtuning_{target_strategy.lower()}"
+
     # execute the hyperparameter tuning
-    result, env = kernel_tuner.tune_kernel('hyperparamtuning', None, [], arguments, hyper_params, *args, lang='Hypertuner',
+    result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, *args, lang='Hypertuner',
                                     objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs)
     
     # remove the temporary cachefile and return only unique results in order

From 1c5720166095351f8ac99b7a6ef7fc669bc07ad7 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Mon, 10 Mar 2025 10:15:23 +0100
Subject: [PATCH 117/253] add test for the optimization algorithm wrapper

---
 kernel_tuner/strategies/wrapper.py |  29 +++++
 test/test_custom_optimizer.py      | 163 +++++++++++++++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 kernel_tuner/strategies/wrapper.py
 create mode 100644 test/test_custom_optimizer.py

diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py
new file mode 100644
index 000000000..8104f7129
--- /dev/null
+++ b/kernel_tuner/strategies/wrapper.py
@@ -0,0 +1,29 @@
+"""Wrapper intended for user-defined custom optimization methods"""
+
+from kernel_tuner import util
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.common import CostFunc
+
+
+class OptAlgWrapper:
+    """Wrapper class for user-defined optimization algorithms"""
+
+    def __init__(self, optimizer, scaling=True):
+        self.optimizer = optimizer
+        self.scaling = scaling
+
+
+    def tune(self, searchspace: Searchspace, runner, tuning_options):
+        cost_func = CostFunc(searchspace, tuning_options, runner, scaling=self.scaling)
+
+        if self.scaling:
+            # Initialize costfunc for scaling
+            cost_func.get_bounds_x0_eps()
+
+        try:
+            self.optimizer(cost_func)
+        except util.StopCriterionReached as e:
+            if tuning_options.verbose:
+                print(e)
+
+        return cost_func.results
diff --git a/test/test_custom_optimizer.py b/test/test_custom_optimizer.py
new file mode 100644
index 000000000..7c483bad4
--- /dev/null
+++ b/test/test_custom_optimizer.py
@@ -0,0 +1,163 @@
+
+### The following was generating using the LLaMEA prompt and OpenAI o1
+
+import numpy as np
+
+class HybridDELocalRefinement:
+    """
+    A two-phase differential evolution with local refinement, intended for BBOB-type
+    black box optimization problems in [-5,5]^dim.
+
+    One-line idea: A two-phase hybrid DE with local refinement that balances global
+    exploration and local exploitation under a strict function evaluation budget.
+    """
+
+    def __init__(self, budget, dim):
+        """
+        Initialize the optimizer with:
+        - budget: total number of function evaluations allowed.
+        - dim: dimensionality of the search space.
+        """
+        self.budget = budget
+        self.dim = dim
+        # You can adjust these hyperparameters based on experimentation/tuning:
+        self.population_size = min(50, 10 * dim)  # Caps for extremely large dim
+        self.F = 0.8        # Differential weight
+        self.CR = 0.9       # Crossover probability
+        self.local_search_freq = 10  # Local refinement frequency in generations
+
+    def __call__(self, func):
+        """
+        Optimize the black box function `func` in [-5,5]^dim, using
+        at most self.budget function evaluations.
+
+        Returns:
+            best_params: np.ndarray representing the best parameters found
+            best_value: float representing the best objective value found
+        """
+        # Check if we have a non-positive budget
+        if self.budget <= 0:
+            raise ValueError("Budget must be a positive integer.")
+
+        # 1. Initialize population
+        lower_bound, upper_bound = -5.0, 5.0
+        pop = np.random.uniform(lower_bound, upper_bound, (self.population_size, self.dim))
+
+        # Evaluate initial population
+        evaluations = 0
+        fitness = np.empty(self.population_size)
+        for i in range(self.population_size):
+            fitness[i] = func(pop[i])
+            evaluations += 1
+            if evaluations >= self.budget:
+                break
+
+        # Track best solution
+        best_idx = np.argmin(fitness)
+        best_params = pop[best_idx].copy()
+        best_value = fitness[best_idx]
+
+        # 2. Main evolutionary loop
+        gen = 0
+        while evaluations < self.budget:
+            gen += 1
+            for i in range(self.population_size):
+                # DE mutation: pick three distinct indices
+                idxs = np.random.choice(self.population_size, 3, replace=False)
+                a, b, c = pop[idxs]
+                mutant = a + self.F * (b - c)
+
+                # Crossover
+                trial = np.copy(pop[i])
+                crossover_points = np.random.rand(self.dim) < self.CR
+                trial[crossover_points] = mutant[crossover_points]
+
+                # Enforce bounds
+                trial = np.clip(trial, lower_bound, upper_bound)
+
+                # Evaluate trial
+                trial_fitness = func(trial)
+                evaluations += 1
+                if evaluations >= self.budget:
+                    # If out of budget, wrap up
+                    if trial_fitness < fitness[i]:
+                        pop[i] = trial
+                        fitness[i] = trial_fitness
+                        # Update global best
+                        if trial_fitness < best_value:
+                            best_value = trial_fitness
+                            best_params = trial.copy()
+                    break
+
+                # Selection
+                if trial_fitness < fitness[i]:
+                    pop[i] = trial
+                    fitness[i] = trial_fitness
+                    # Update global best
+                    if trial_fitness < best_value:
+                        best_value = trial_fitness
+                        best_params = trial.copy()
+
+            # Periodically refine best solution with a small local neighborhood search
+            if gen % self.local_search_freq == 0 and evaluations < self.budget:
+                best_params, best_value, evaluations = self._local_refinement(
+                    func, best_params, best_value, evaluations, lower_bound, upper_bound
+                )
+
+            if evaluations >= self.budget:
+                break
+
+        return best_params, best_value
+
+    def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub):
+        """
+        Local refinement around the best solution found so far.
+        Uses a quick 'perturb-and-accept' approach in a shrinking neighborhood.
+        """
+        # Neighborhood size shrinks as the budget is consumed
+        frac_budget_used = evaluations / self.budget
+        step_size = 0.2 * (1.0 - frac_budget_used)
+
+        for _ in range(5):  # 5 refinements each time
+            if evaluations >= self.budget:
+                break
+            candidate = best_params + np.random.uniform(-step_size, step_size, self.dim)
+            candidate = np.clip(candidate, lb, ub)
+            cand_value = func(candidate)
+            evaluations += 1
+            if cand_value < best_value:
+                best_value = cand_value
+                best_params = candidate.copy()
+
+        return best_params, best_value, evaluations
+
+
+
+
+### Testing the Optimization Algorithm Wrapper in Kernel Tuner
+import os
+from kernel_tuner import tune_kernel
+from kernel_tuner.strategies.wrapper import OptAlgWrapper
+cache_filename = os.path.dirname(
+
+    os.path.realpath(__file__)) + "/test_cache_file.json"
+
+from .test_runners import env
+
+
+def test_OptAlgWrapper(env):
+    kernel_name, kernel_string, size, args, tune_params = env
+
+    # Instantiate LLaMAE optimization algorithm
+    budget = int(15)
+    dim = len(tune_params)
+    optimizer = HybridDELocalRefinement(budget, dim)
+
+    # Wrap the algorithm class in the OptAlgWrapper
+    # for use in Kernel Tuner
+    strategy = OptAlgWrapper(optimizer)
+
+    # Call the tuner
+    tune_kernel(kernel_name, kernel_string, size, args, tune_params,
+                strategy=strategy, cache=cache_filename,
+                simulation_mode=True, verbose=True)

From d28fdbe4318b05b7b9f5999e63a4e30e48cc294d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 10 Mar 2025 17:32:23 +0100
Subject: [PATCH 118/253] Adjusted the test / train sets and number of repeats

---
 kernel_tuner/backends/hypertuner.py | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index ec818b6b3..70921fe4c 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -61,10 +61,7 @@ def compile(self, kernel_instance):
         path.mkdir(exist_ok=True)
 
         # TODO get applications & GPUs args from benchmark
-        # gpus = ["RTX_3090", "RTX_2080_Ti"]
-        # applications = None
-
-        gpus = ["A100", "W6600"]
+        gpus = ["A100", "A4000", "MI250X", "W6600"]
         folder = "../../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
@@ -72,21 +69,21 @@ def compile(self, kernel_instance):
                 "folder": folder,
                 "input_file": "dedispersion_milo.json"
             },
-            {
-                "name": "convolution_milo",
-                "folder": folder,
-                "input_file": "convolution_milo.json"
-            },
+            # {
+            #     "name": "convolution_milo",
+            #     "folder": folder,
+            #     "input_file": "convolution_milo.json"
+            # },
             {
                 "name": "hotspot_milo",
                 "folder": folder,
                 "input_file": "hotspot_milo.json"
             },
-            {
-                "name": "gemm_milo",
-                "folder": folder,
-                "input_file": "gemm_milo.json"
-            }
+            # {
+            #     "name": "gemm_milo",
+            #     "folder": folder,
+            #     "input_file": "gemm_milo.json"
+            # }
         ]
 
         # strategy settings
@@ -104,7 +101,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 10,
+                "repeats": 20,
                 "samples": self.iterations 
             }
         }

From 49fa92f4205bf2df4e94e08f9c4a12efc7978f00 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 10 Mar 2025 23:31:33 +0100
Subject: [PATCH 119/253] Added simulated_annealing to hyperparameter tuning,
 adjusted greedy_ils parameters

---
 kernel_tuner/hyper.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 6df76370f..88a47a2bb 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -111,8 +111,8 @@ def put_if_not_present(target_dict, key, value):
         hyperparams = {
             'neighbor': ['Hamming', 'adjacent'],
             'restart': [True, False],
-            'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
-            'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+            'no_improvement': [10, 25, 50, 75],
+            'random_walk': [0.1, 0.2, 0.3, 0.4, 0.5]
         }
     elif strategy_to_tune.lower() == "dual_annealing":
         hyperparams = {
@@ -142,6 +142,13 @@ def put_if_not_present(target_dict, key, value):
             'restart': [True, False],
             'randomize': [True, False]
         }
+    elif strategy_to_tune.lower() == "simulated_annealing":
+        hyperparams = {
+            'T': [0.5, 1.0, 1.5],
+            'T_min': [0.0001, 0.001, 0.01],
+            'alpha': [0.9925, 0.995, 0.9975],
+            'maxiter': [1, 2, 3]
+        }
     else:
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 

From 1056269590a8d19c12003581bb2f0c683cb9d526 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Mar 2025 10:21:01 +0100
Subject: [PATCH 120/253] Updated hyperparameters

---
 kernel_tuner/hyper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 88a47a2bb..ba3e615be 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -103,7 +103,7 @@ def put_if_not_present(target_dict, key, value):
         hyperparams = {
             'popsize': [10, 20, 30],
             'maxiter': [50, 100, 150],
-            'w': [0.25, 0.5, 0.75],
+            # 'w': [0.25, 0.5, 0.75],   # disabled due to low influence according to KW-test (H=0.0215) and mutual information
             'c1': [1.0, 2.0, 3.0],
             'c2': [0.5, 1.0, 1.5]
         }
@@ -127,7 +127,7 @@ def put_if_not_present(target_dict, key, value):
     elif strategy_to_tune.lower() == "basinhopping":
         hyperparams = {
             'method': ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"],
-            'T': [0.5, 1.0, 1.5],
+            'T': [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5],
         }
     elif strategy_to_tune.lower() == "genetic_algorithm":
         hyperparams = {

From 7ce2234b0a2c373d9bd15b6349af233b3c5bf4f0 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Mar 2025 10:21:37 +0100
Subject: [PATCH 121/253] Updated search spaces used in hyperparameter tuning
 and number of repeats

---
 kernel_tuner/backends/hypertuner.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 70921fe4c..a7ee2665e 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -61,7 +61,7 @@ def compile(self, kernel_instance):
         path.mkdir(exist_ok=True)
 
         # TODO get applications & GPUs args from benchmark
-        gpus = ["A100", "A4000", "MI250X", "W6600"]
+        gpus = ["A100", "A4000", "MI250X"]
         folder = "../../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
@@ -69,21 +69,21 @@ def compile(self, kernel_instance):
                 "folder": folder,
                 "input_file": "dedispersion_milo.json"
             },
-            # {
-            #     "name": "convolution_milo",
-            #     "folder": folder,
-            #     "input_file": "convolution_milo.json"
-            # },
             {
                 "name": "hotspot_milo",
                 "folder": folder,
                 "input_file": "hotspot_milo.json"
             },
-            # {
-            #     "name": "gemm_milo",
-            #     "folder": folder,
-            #     "input_file": "gemm_milo.json"
-            # }
+            {
+                "name": "convolution_milo",
+                "folder": folder,
+                "input_file": "convolution_milo.json"
+            },
+            {
+                "name": "gemm_milo",
+                "folder": folder,
+                "input_file": "gemm_milo.json"
+            }
         ]
 
         # strategy settings
@@ -101,7 +101,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 20,
+                "repeats": 25,
                 "samples": self.iterations 
             }
         }

From 1ed18937c8a75245ca5faf004e8fbb0c7fdd7b0e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 15 Mar 2025 18:01:02 +0100
Subject: [PATCH 122/253] Added bayes_opt to hyperparamtuning

---
 kernel_tuner/hyper.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index ba3e615be..a831370ef 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -149,6 +149,14 @@ def put_if_not_present(target_dict, key, value):
             'alpha': [0.9925, 0.995, 0.9975],
             'maxiter': [1, 2, 3]
         }
+    elif strategy_to_tune.lower() == "bayes_opt":
+        hyperparams = {
+            # 'covariancekernel': ["constantrbf", "rbf", "matern32", "matern52"],
+            # 'covariancelengthscale': [1.0, 1.5, 2.0],
+            'method': ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"],
+            'samplingmethod': ["random", "LHS"],
+            'popsize': [10, 20, 30]
+        }
     else:
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 

From 1e2532f132ed289a7e14f8a0055f013b0363b595 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 17 Mar 2025 07:49:10 +0100
Subject: [PATCH 123/253] Fixed link with hyperparameter tuning attributes

---
 kernel_tuner/interface.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 45a4bfef9..54b9f9974 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -886,6 +886,16 @@ def tune_kernel_T1(
     problem_size = kernelspec["ProblemSize"]
     device = kernelspec["Device"]["Name"]
     strategy = inputs["Search"]["Name"]
+    if "Attributes" in inputs["Search"]:
+        strategy_options = {}
+        for attribute in inputs["Search"]["Attributes"]:
+            strategy_options[attribute["Name"]] = attribute["Value"]
+    if "Budget" in inputs:
+        budget = inputs["Budget"][0]
+        assert budget["Type"] == "ConfigurationCount"
+        if strategy_options is None:
+            strategy_options = {}
+        strategy_options["max_fevals"] = budget["BudgetValue"]
 
     # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:

From afbf83ea053cc9e2c318a686fb7dc3442f1cd1e6 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 00:47:22 +0100
Subject: [PATCH 124/253] Added support for evaluating T1 strings as a type

---
 kernel_tuner/interface.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 54b9f9974..5f4c1b628 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -918,6 +918,8 @@ def tune_kernel_T1(
                 tune_param = eval(vals)
             else:
                 tune_param = literal_eval(vals)
+        if param["Type"] == "string":
+            tune_param = eval(param["Values"])
         if tune_param is not None:
             tune_params[param["Name"]] = tune_param
         else:

From 84a2b1f0278c9ddbdbca39e1f5a28636a58284e3 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 00:47:57 +0100
Subject: [PATCH 125/253] Added automatic scaling of random sample size if
 necessary

---
 kernel_tuner/strategies/genetic_algorithm.py |  6 ++++-
 kernel_tuner/strategies/pso.py               | 23 ++++++++++++--------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index c29c150b5..461f655e9 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -1,4 +1,5 @@
 """A simple genetic algorithm for parameter search."""
+
 import random
 
 import numpy as np
@@ -20,6 +21,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
     pop_size, generations, method, mutation_chance = common.get_options(options, _options)
+    pop_size = min(round(searchspace.size / 2), pop_size)
     crossover = supported_methods[method]
 
     best_score = 1e20
@@ -46,7 +48,9 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         # 'best_score' is used only for printing
         if tuning_options.verbose and cost_func.results:
-            best_score = util.get_best_config(cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better)[tuning_options.objective]
+            best_score = util.get_best_config(
+                cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better
+            )[tuning_options.objective]
 
         if tuning_options.verbose:
             print("Generation %d, best_score %f" % (generation, best_score))
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 5b0df1429..19ada64ac 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -1,4 +1,5 @@
 """The strategy that uses particle swarm optimization."""
+
 import random
 import sys
 
@@ -9,22 +10,25 @@
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
 
-_options = dict(popsize=("Population size", 20),
-                       maxiter=("Maximum number of iterations", 100),
-                       w=("Inertia weight constant", 0.5),
-                       c1=("Cognitive constant", 2.0),
-                       c2=("Social constant", 1.0))
+_options = dict(
+    popsize=("Population size", 20),
+    maxiter=("Maximum number of iterations", 100),
+    w=("Inertia weight constant", 0.5),
+    c1=("Cognitive constant", 2.0),
+    c2=("Social constant", 1.0),
+)
+
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
-    #scale variables in x because PSO works with velocities to visit different configurations
+    # scale variables in x because PSO works with velocities to visit different configurations
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
 
-    #using this instead of get_bounds because scaling is used
+    # using this instead of get_bounds because scaling is used
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
-
     num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options)
+    num_particles = min(round(searchspace.size / 2), num_particles)
 
     best_score_global = sys.float_info.max
     best_position_global = []
@@ -64,7 +68,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             swarm[j].update_position(bounds)
 
     if tuning_options.verbose:
-        print('Final result:')
+        print("Final result:")
         print(best_position_global)
         print(best_score_global)
 
@@ -73,6 +77,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 tune.__doc__ = common.get_strategy_docstring("Particle Swarm Optimization (PSO)", _options)
 
+
 class Particle:
     def __init__(self, bounds):
         self.ndim = len(bounds)

From 9e8047944ff33c075aa994986ae988a4b57cfc44 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 00:48:15 +0100
Subject: [PATCH 126/253] Formatting

---
 kernel_tuner/util.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index a4fb42bd3..01cca83a5 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -459,13 +459,13 @@ def get_interval(a: list):
     a_min = min(a)
     a_max = max(a)
     if len(a) <= 2:
-        return (a_min, a_max, a_max-a_min)
+        return (a_min, a_max, a_max - a_min)
     # determine the first step size
-    step = a[1]-a_min
+    step = a[1] - a_min
     # for each element, the step size should be equal to the first step
     for i, e in enumerate(a):
-        if e-a[i-1] != step:
-            return None 
+        if e - a[i - 1] != step:
+            return None
     result = (a_min, a_max, step)
     if not all(isinstance(e, (int, float)) for e in result):
         return None
@@ -1039,12 +1039,12 @@ def to_equality_constraint(
                 return AllDifferentConstraint()
             return ValueError(f"Not possible: comparator should be '==' or '!=', is {comparator}")
         return None
-    
+
     # remove functionally duplicate restrictions (preserves order and whitespace)
     if all(isinstance(r, str) for r in restrictions):
         # clean the restriction strings to functional equivalence
-        restrictions_cleaned = [r.replace(' ', '') for r in restrictions]
-        restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned)) # dict preserves order
+        restrictions_cleaned = [r.replace(" ", "") for r in restrictions]
+        restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned))  # dict preserves order
         # get the indices of the unique restrictions, use these to build a new list of restrictions
         restrictions_unique_indices = [restrictions_cleaned.index(r) for r in restrictions_cleaned_unique]
         restrictions = [restrictions[i] for i in restrictions_unique_indices]
@@ -1107,8 +1107,12 @@ def to_equality_constraint(
             # combine multiple restrictions into one
             for res_tuple in res_dict.values():
                 res, params_used = res_tuple
-                params_used = list(dict.fromkeys(params_used))   # param_used should only contain unique, dict preserves order
-                parsed_restrictions_pyatf.append((f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used))
+                params_used = list(
+                    dict.fromkeys(params_used)
+                )  # param_used should only contain unique, dict preserves order
+                parsed_restrictions_pyatf.append(
+                    (f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used)
+                )
             parsed_restrictions = parsed_restrictions_pyatf
     else:
         # create one monolithic function

From ce552d06de058d402215aec4f6ab313f9da15b37 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 06:07:05 +0100
Subject: [PATCH 127/253] Minor update to hyperparameter tuning

---
 kernel_tuner/backends/hypertuner.py | 2 +-
 kernel_tuner/hyper.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index a7ee2665e..b15da315a 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -101,7 +101,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 25,
+                "repeats": 10,
                 "samples": self.iterations 
             }
         }
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index a831370ef..27672cf97 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -136,7 +136,7 @@ def put_if_not_present(target_dict, key, value):
             'maxiter': [50, 100, 150],
             'mutation_chance': [5, 10, 20]
         }
-    elif strategy_to_tune.lower() == "mls":
+    elif strategy_to_tune.lower() == "greedy_mls":
         hyperparams = {
             'neighbor': ["Hamming", "adjacent"],
             'restart': [True, False],

From 2714c2880dbb4e581a248a8438a4e7718214d687 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 06:34:29 +0100
Subject: [PATCH 128/253] Set new default hyperparameters for PSO, dual
 annealing and simulated annealing

---
 kernel_tuner/strategies/dual_annealing.py      | 2 +-
 kernel_tuner/strategies/pso.py                 | 6 +++---
 kernel_tuner/strategies/simulated_annealing.py | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
index 0f44bd849..ace532534 100644
--- a/kernel_tuner/strategies/dual_annealing.py
+++ b/kernel_tuner/strategies/dual_annealing.py
@@ -8,7 +8,7 @@
 
 supported_methods = ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr']
 
-_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "Powell"))
+_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "COBYLA"))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 19ada64ac..cc6b82d49 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -12,10 +12,10 @@
 
 _options = dict(
     popsize=("Population size", 20),
-    maxiter=("Maximum number of iterations", 100),
+    maxiter=("Maximum number of iterations", 150),
     w=("Inertia weight constant", 0.5),
-    c1=("Cognitive constant", 2.0),
-    c2=("Social constant", 1.0),
+    c1=("Cognitive constant", 3.0),
+    c2=("Social constant", 1.5),
 )
 
 
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index dcb9e3f26..d73c0ad5e 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -9,10 +9,10 @@
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
 
-_options = dict(T=("Starting temperature", 1.0),
-                       T_min=("End temperature", 0.001),
-                       alpha=("Alpha parameter", 0.995),
-                       maxiter=("Number of iterations within each annealing step", 1))
+_options = dict(T=("Starting temperature", 0.5),
+                       T_min=("End temperature", 0.0001),
+                       alpha=("Alpha parameter", 0.9975),
+                       maxiter=("Number of iterations within each annealing step", 2))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     # SA works with real parameter values and does not need scaling

From 25d5202c8689f557f2f19e19488aa20d26a08fe8 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 16:41:23 +0100
Subject: [PATCH 129/253] Set new default hyperparameters for Genetic Algorithm
 and Differential Evolution

---
 kernel_tuner/strategies/diff_evo.py          | 2 +-
 kernel_tuner/strategies/genetic_algorithm.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 62e966f33..cd089ae1e 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -9,7 +9,7 @@
 supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp",
                      "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
 
-_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"),
+_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "randtobest1bin"),
                        popsize=("Population size", 20),
                        maxiter=("Number of generations", 100))
 
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 461f655e9..0ca0f5f75 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -10,10 +10,10 @@
 from kernel_tuner.strategies.common import CostFunc
 
 _options = dict(
-    popsize=("population size", 20),
-    maxiter=("maximum number of generations", 100),
+    popsize=("population size", 30),
+    maxiter=("maximum number of generations", 30),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
-    mutation_chance=("chance to mutate is 1 in mutation_chance", 10),
+    mutation_chance=("chance to mutate is 1 in mutation_chance", 20),
 )
 
 

From 651c42c2fc3e6153e3c4a235125e7ad95e26371f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:28:08 +0100
Subject: [PATCH 130/253] Avoid requesting more random samples than the
 searchspace size

---
 kernel_tuner/searchspace.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 8265c44ab..6331bed33 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from random import choice, shuffle
 from typing import List, Union
+from warnings import warn
 
 import numpy as np
 from constraint import (
@@ -69,9 +70,7 @@ def __init__(
             ), "When `from_cache` is used, the positional arguments must be set to None."
             tune_params = from_cache["tune_params"]
         if from_cache is None:
-            assert (
-                tune_params is not None and max_threads is not None
-            ), "Must specify positional arguments."
+            assert tune_params is not None and max_threads is not None, "Must specify positional arguments."
 
         # set the object attributes using the arguments
         framework_l = framework.lower()
@@ -864,6 +863,11 @@ def get_random_sample_indices(self, num_samples: int) -> np.ndarray:
 
     def get_random_sample(self, num_samples: int) -> List[tuple]:
         """Get the parameter configurations for a random, non-conflicting sample (caution: not unique in consecutive calls)."""
+        if self.size < num_samples:
+            warn(
+                f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})"
+            )
+            num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None) -> List[int]:

From b953a69af37bcb3beedb0003db7758af0e25cc8d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:29:00 +0100
Subject: [PATCH 131/253] Clearer message when exceeding the stop criterion

---
 kernel_tuner/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 01cca83a5..adaa2b6df 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -193,9 +193,9 @@ def check_argument_list(kernel_name, kernel_string, args):
 def check_stop_criterion(to):
     """Checks if max_fevals is reached or time limit is exceeded."""
     if "max_fevals" in to and len(to.unique_results) >= to.max_fevals:
-        raise StopCriterionReached("max_fevals reached")
+        raise StopCriterionReached(f"max_fevals reached ({len(to.unique_results)} >= {to.max_fevals})")
     if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit):
-        raise StopCriterionReached("time limit exceeded")
+        raise StopCriterionReached(f"time limit ({to.time_limit}) exceeded")
 
 
 def check_tune_params_list(tune_params, observers, simulation_mode=False):

From a401008651bd7765d49988515c5a0848518ec2f7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:30:17 +0100
Subject: [PATCH 132/253] Add soft maximum function evaluations limit to dual
 annealing

---
 kernel_tuner/strategies/dual_annealing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
index ace532534..7d9868c5e 100644
--- a/kernel_tuner/strategies/dual_annealing.py
+++ b/kernel_tuner/strategies/dual_annealing.py
@@ -12,7 +12,8 @@
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
-    method = common.get_options(tuning_options.strategy_options, _options)[0]
+    _options["max_fevals"] = ("", searchspace.size)
+    method, max_fevals = common.get_options(tuning_options.strategy_options, _options)
 
     #scale variables in x to make 'eps' relevant for multiple variables
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
@@ -29,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     opt_result = None
     try:
-        opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0)
+        opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0, maxfun=max_fevals)
     except util.StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)

From 425b4f4f1935158fe1a8a0cbf624f2dea1b1e025 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:30:43 +0100
Subject: [PATCH 133/253] Improved rounding of encoded parameter values

---
 kernel_tuner/strategies/common.py | 40 ++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index eb0b81e27..30c53bc10 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -61,8 +61,16 @@ class CostFunc:
     """Class encapsulating the CostFunc method."""
 
     def __init__(
-        self, searchspace: Searchspace, tuning_options, runner, *, 
-        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False, return_raw=None
+        self,
+        searchspace: Searchspace,
+        tuning_options,
+        runner,
+        *,
+        scaling=False,
+        snap=True,
+        encode_non_numeric=False,
+        return_invalid=False,
+        return_raw=None,
     ):
         """An abstract method to handle evaluation of configurations.
 
@@ -75,7 +83,7 @@ def __init__(
             encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
             return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
             return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None.
-        """        
+        """
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
@@ -100,7 +108,9 @@ def __init__(
             for i, param_values in enumerate(self.searchspace.params_values):
                 encoded_values = param_values
                 if not all(isinstance(v, numbers.Real) for v in param_values):
-                    encoded_values = np.arange(len(param_values))
+                    encoded_values = np.arange(
+                        len(param_values)
+                    )  # NOTE when changing this, adjust the rounding in encoded_to_params
                     self._map_param_to_encoded[i] = dict(zip(param_values, encoded_values))
                     self._map_encoded_to_param[i] = dict(zip(encoded_values, param_values))
                 self.encoded_params_values.append(encoded_values)
@@ -215,17 +225,29 @@ def get_bounds(self):
         for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
             bounds.append((min(values), max(values)))
         return bounds
-    
+
     def encoded_to_params(self, config):
         """Convert from an encoded configuration to the real parameters."""
         if not self.encode_non_numeric:
             raise ValueError("'encode_non_numeric' must be set to true to use this function.")
         params = []
         for i, v in enumerate(config):
-            params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
-        assert len(params) == len(config)            
+            # params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
+            if i in self._map_encoded_to_param:
+                encoding = self._map_encoded_to_param[i]
+                if v in encoding:
+                    param = encoding[v]
+                elif isinstance(v, float):
+                    # try to resolve a rounding error due to floating point arithmetic / continous solver
+                    param = encoding[round(v)]
+                else:
+                    raise ValueError(f"Encoded value {v} not found in {self._map_encoded_to_param[i]}")
+            else:
+                param = v
+            params.append(param)
+        assert len(params) == len(config)
         return params
-    
+
     def params_to_encoded(self, config):
         """Convert from a parameter configuration to the encoded configuration."""
         if not self.encode_non_numeric:
@@ -233,7 +255,7 @@ def params_to_encoded(self, config):
         encoded = []
         for i, v in enumerate(config):
             encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
-        assert len(encoded) == len(config)            
+        assert len(encoded) == len(config)
         return encoded
 
 

From 3bba92367743179e9662cbaf8c9c0a28824b01c6 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:59:39 +0100
Subject: [PATCH 134/253] Updated pyproject and requirements files

---
 doc/requirements.txt      | 176 ++++++++--------
 doc/requirements_test.txt | 411 +++++++++++++++++++++++++-------------
 pyproject.toml            |   2 +-
 3 files changed, 357 insertions(+), 232 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 355caa7a6..5f316bb33 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,89 +1,87 @@
-alabaster==0.7.16 ; python_version >= "3.9" and python_version < "3.15"
-asttokens==3.0.0 ; python_version >= "3.9" and python_version < "3.15"
-attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15"
-babel==2.17.0 ; python_version >= "3.9" and python_version < "3.15"
-beautifulsoup4==4.13.3 ; python_version >= "3.9" and python_version < "3.15"
-bleach[css]==6.2.0 ; python_version >= "3.9" and python_version < "3.15"
-certifi==2025.1.31 ; python_version >= "3.9" and python_version < "3.15"
-cffi==1.17.1 ; python_version >= "3.9" and python_version < "3.15" and implementation_name == "pypy"
-charset-normalizer==3.4.1 ; python_version >= "3.9" and python_version < "3.15"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and sys_platform == "win32"
-decorator==5.2.1 ; python_version >= "3.9" and python_version < "3.15"
-defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "3.15"
-docutils==0.20.1 ; python_version >= "3.9" and python_version < "3.15"
-dom-toml==2.0.1 ; python_version >= "3.9" and python_version < "3.15"
-domdf-python-tools==3.10.0 ; python_version >= "3.9" and python_version < "3.15"
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
-executing==2.2.0 ; python_version >= "3.9" and python_version < "3.15"
-fastjsonschema==2.21.1 ; python_version >= "3.9" and python_version < "3.15"
-idna==3.10 ; python_version >= "3.9" and python_version < "3.15"
-imagesize==1.4.1 ; python_version >= "3.9" and python_version < "3.15"
-importlib-metadata==8.6.1 ; python_version >= "3.9" and python_version < "3.10"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-ipython==8.18.1 ; python_version >= "3.9" and python_version < "3.15"
-jedi==0.19.2 ; python_version >= "3.9" and python_version < "3.15"
-jinja2==3.1.6 ; python_version >= "3.9" and python_version < "3.15"
-joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15"
-jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15"
-jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15"
-jupyter-client==8.6.3 ; python_version >= "3.9" and python_version < "3.15"
-jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "3.15"
-jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "3.15"
-markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.15"
-matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "3.15"
-mistune==3.1.2 ; python_version >= "3.9" and python_version < "3.15"
-natsort==8.4.0 ; python_version >= "3.9" and python_version < "3.15"
-nbclient==0.10.2 ; python_version >= "3.9" and python_version < "3.15"
-nbconvert==7.16.6 ; python_version >= "3.9" and python_version < "3.15"
-nbformat==5.10.4 ; python_version >= "3.9" and python_version < "3.15"
-nbsphinx==0.9.7 ; python_version >= "3.9" and python_version < "3.15"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15"
-packaging==24.2 ; python_version >= "3.9" and python_version < "3.15"
-pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15"
-pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "3.15"
-parso==0.8.4 ; python_version >= "3.9" and python_version < "3.15"
-pexpect==4.9.0 ; python_version >= "3.9" and python_version < "3.15" and sys_platform != "win32"
-platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15"
-prompt-toolkit==3.0.50 ; python_version >= "3.9" and python_version < "3.15"
-ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "3.15" and sys_platform != "win32"
-pure-eval==0.2.3 ; python_version >= "3.9" and python_version < "3.15"
-pycparser==2.22 ; python_version >= "3.9" and python_version < "3.15" and implementation_name == "pypy"
-pygments==2.19.1 ; python_version >= "3.9" and python_version < "3.15"
-pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15"
-python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15"
-python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15"
-pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15"
-pywin32==308 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.15"
-pyzmq==26.2.1 ; python_version >= "3.9" and python_version < "3.15"
-referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15"
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.15"
-rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15"
-scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15"
-six==1.17.0 ; python_version >= "3.9" and python_version < "3.15"
-snowballstemmer==2.2.0 ; python_version >= "3.9" and python_version < "3.15"
-soupsieve==2.6 ; python_version >= "3.9" and python_version < "3.15"
-sphinx-pyproject==0.3.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinx-rtd-theme==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinx==7.4.7 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-jquery==4.1 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.15"
-threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15"
-tinycss2==1.4.0 ; python_version >= "3.9" and python_version < "3.15"
-tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15"
-tornado==6.4.2 ; python_version >= "3.9" and python_version < "3.15"
-traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.15"
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.15"
-tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15"
-urllib3==2.3.0 ; python_version >= "3.9" and python_version < "3.15"
-wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.15"
-webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.15"
-xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15"
-zipp==3.21.0 ; python_version >= "3.9" and python_version < "3.10"
+alabaster==0.7.16 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+babel==2.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+bleach==6.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+certifi==2025.1.31 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+cffi==1.17.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32"
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+defusedxml==0.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+docutils==0.20.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+dom-toml==2.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+idna==3.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+imagesize==1.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jinja2==3.1.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jupyter-client==8.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jupyter-core==5.7.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+mistune==3.1.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+natsort==8.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pywin32==310 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" and platform_python_implementation != "PyPy" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" and platform_python_implementation != "PyPy"
+pyzmq==26.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+requests==2.32.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+soupsieve==2.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinx==7.4.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tinycss2==1.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tornado==6.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+urllib3==2.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index f4f62912c..11ed8518b 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -1,116 +1,195 @@
-argcomplete==3.6.0 ; python_version >= "3.9" and python_version < "3.15" \
+argcomplete==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b \
     --hash=sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd
-attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e \
-    --hash=sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a
-build==1.2.2.post1 ; python_version >= "3.9" and python_version < "3.15" \
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \
+    --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \
+    --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b
+build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \
     --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and (sys_platform == "win32" or os_name == "nt") \
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and os_name == "nt" \
     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
-colorlog==6.9.0 ; python_version >= "3.9" and python_version < "3.15" \
+colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \
     --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2
-coverage[toml]==7.6.12 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:00b2086892cf06c7c2d74983c9595dc511acca00665480b3ddff749ec4fb2a95 \
-    --hash=sha256:0533adc29adf6a69c1baa88c3d7dbcaadcffa21afbed3ca7a225a440e4744bf9 \
-    --hash=sha256:06097c7abfa611c91edb9e6920264e5be1d6ceb374efb4986f38b09eed4cb2fe \
-    --hash=sha256:07e92ae5a289a4bc4c0aae710c0948d3c7892e20fd3588224ebe242039573bf0 \
-    --hash=sha256:0a9d8be07fb0832636a0f72b80d2a652fe665e80e720301fb22b191c3434d924 \
-    --hash=sha256:0e549f54ac5f301e8e04c569dfdb907f7be71b06b88b5063ce9d6953d2d58574 \
-    --hash=sha256:0ef01d70198431719af0b1f5dcbefc557d44a190e749004042927b2a3fed0702 \
-    --hash=sha256:0f16f44025c06792e0fb09571ae454bcc7a3ec75eeb3c36b025eccf501b1a4c3 \
-    --hash=sha256:14d47376a4f445e9743f6c83291e60adb1b127607a3618e3185bbc8091f0467b \
-    --hash=sha256:1a936309a65cc5ca80fa9f20a442ff9e2d06927ec9a4f54bcba9c14c066323f2 \
-    --hash=sha256:1ceeb90c3eda1f2d8c4c578c14167dbd8c674ecd7d38e45647543f19839dd6ea \
-    --hash=sha256:1f7ffa05da41754e20512202c866d0ebfc440bba3b0ed15133070e20bf5aeb5f \
-    --hash=sha256:200e10beb6ddd7c3ded322a4186313d5ca9e63e33d8fab4faa67ef46d3460af3 \
-    --hash=sha256:220fa6c0ad7d9caef57f2c8771918324563ef0d8272c94974717c3909664e674 \
-    --hash=sha256:2251fabcfee0a55a8578a9d29cecfee5f2de02f11530e7d5c5a05859aa85aee9 \
-    --hash=sha256:2458f275944db8129f95d91aee32c828a408481ecde3b30af31d552c2ce284a0 \
-    --hash=sha256:299cf973a7abff87a30609879c10df0b3bfc33d021e1adabc29138a48888841e \
-    --hash=sha256:2b996819ced9f7dbb812c701485d58f261bef08f9b85304d41219b1496b591ef \
-    --hash=sha256:3688b99604a24492bcfe1c106278c45586eb819bf66a654d8a9a1433022fb2eb \
-    --hash=sha256:3a1e465f398c713f1b212400b4e79a09829cd42aebd360362cd89c5bdc44eb87 \
-    --hash=sha256:488c27b3db0ebee97a830e6b5a3ea930c4a6e2c07f27a5e67e1b3532e76b9ef1 \
-    --hash=sha256:48cfc4641d95d34766ad41d9573cc0f22a48aa88d22657a1fe01dca0dbae4de2 \
-    --hash=sha256:4b467a8c56974bf06e543e69ad803c6865249d7a5ccf6980457ed2bc50312703 \
-    --hash=sha256:53c56358d470fa507a2b6e67a68fd002364d23c83741dbc4c2e0680d80ca227e \
-    --hash=sha256:5d1095bbee1851269f79fd8e0c9b5544e4c00c0c24965e66d8cba2eb5bb535fd \
-    --hash=sha256:641dfe0ab73deb7069fb972d4d9725bf11c239c309ce694dd50b1473c0f641c3 \
-    --hash=sha256:64cbb1a3027c79ca6310bf101014614f6e6e18c226474606cf725238cf5bc2d4 \
-    --hash=sha256:66fe626fd7aa5982cdebad23e49e78ef7dbb3e3c2a5960a2b53632f1f703ea45 \
-    --hash=sha256:676f92141e3c5492d2a1596d52287d0d963df21bf5e55c8b03075a60e1ddf8aa \
-    --hash=sha256:69e62c5034291c845fc4df7f8155e8544178b6c774f97a99e2734b05eb5bed31 \
-    --hash=sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8 \
-    --hash=sha256:78f5243bb6b1060aed6213d5107744c19f9571ec76d54c99cc15938eb69e0e86 \
-    --hash=sha256:79cac3390bfa9836bb795be377395f28410811c9066bc4eefd8015258a7578c6 \
-    --hash=sha256:7ae6eabf519bc7871ce117fb18bf14e0e343eeb96c377667e3e5dd12095e0288 \
-    --hash=sha256:7e39e845c4d764208e7b8f6a21c541ade741e2c41afabdfa1caa28687a3c98cf \
-    --hash=sha256:8161d9fbc7e9fe2326de89cd0abb9f3599bccc1287db0aba285cb68d204ce929 \
-    --hash=sha256:8bec2ac5da793c2685ce5319ca9bcf4eee683b8a1679051f8e6ec04c4f2fd7dc \
-    --hash=sha256:959244a17184515f8c52dcb65fb662808767c0bd233c1d8a166e7cf74c9ea985 \
-    --hash=sha256:9b148068e881faa26d878ff63e79650e208e95cf1c22bd3f77c3ca7b1d9821a3 \
-    --hash=sha256:aa6f302a3a0b5f240ee201297fff0bbfe2fa0d415a94aeb257d8b461032389bd \
-    --hash=sha256:ace9048de91293e467b44bce0f0381345078389814ff6e18dbac8fdbf896360e \
-    --hash=sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879 \
-    --hash=sha256:b01a840ecc25dce235ae4c1b6a0daefb2a203dba0e6e980637ee9c2f6ee0df57 \
-    --hash=sha256:b076e625396e787448d27a411aefff867db2bffac8ed04e8f7056b07024eed5a \
-    --hash=sha256:b172f8e030e8ef247b3104902cc671e20df80163b60a203653150d2fc204d1ad \
-    --hash=sha256:b1f097878d74fe51e1ddd1be62d8e3682748875b461232cf4b52ddc6e6db0bba \
-    --hash=sha256:b95574d06aa9d2bd6e5cc35a5bbe35696342c96760b69dc4287dbd5abd4ad51d \
-    --hash=sha256:bda1c5f347550c359f841d6614fb8ca42ae5cb0b74d39f8a1e204815ebe25750 \
-    --hash=sha256:cec6b9ce3bd2b7853d4a4563801292bfee40b030c05a3d29555fd2a8ee9bd68c \
-    --hash=sha256:d1a987778b9c71da2fc8948e6f2656da6ef68f59298b7e9786849634c35d2c3c \
-    --hash=sha256:d74c08e9aaef995f8c4ef6d202dbd219c318450fe2a76da624f2ebb9c8ec5d9f \
-    --hash=sha256:e18aafdfb3e9ec0d261c942d35bd7c28d031c5855dadb491d2723ba54f4c3015 \
-    --hash=sha256:e216c5c45f89ef8971373fd1c5d8d1164b81f7f5f06bbf23c37e7908d19e8558 \
-    --hash=sha256:e695df2c58ce526eeab11a2e915448d3eb76f75dffe338ea613c1201b33bab2f \
-    --hash=sha256:e7575ab65ca8399c8c4f9a7d61bbd2d204c8b8e447aab9d355682205c9dd948d \
-    --hash=sha256:e995b3b76ccedc27fe4f477b349b7d64597e53a43fc2961db9d3fbace085d69d \
-    --hash=sha256:ea31689f05043d520113e0552f039603c4dd71fa4c287b64cb3606140c66f425 \
-    --hash=sha256:eb5507795caabd9b2ae3f1adc95f67b1104971c22c624bb354232d65c4fc90b3 \
-    --hash=sha256:eb8668cfbc279a536c633137deeb9435d2962caec279c3f8cf8b91fff6ff8953 \
-    --hash=sha256:ecea0c38c9079570163d663c0433a9af4094a60aafdca491c6a3d248c7432827 \
-    --hash=sha256:f25d8b92a4e31ff1bd873654ec367ae811b3a943583e05432ea29264782dc32c \
-    --hash=sha256:f60a297c3987c6c02ffb29effc70eadcbb412fe76947d394a1091a3615948e2f \
-    --hash=sha256:f973643ef532d4f9be71dd88cf7588936685fdb576d93a79fe9f65bc337d9d73
-distlib==0.3.9 ; python_version >= "3.9" and python_version < "3.15" \
+coverage==7.7.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:056d3017ed67e7ddf266e6f57378ece543755a4c9231e997789ab3bd11392c94 \
+    --hash=sha256:0ce8cf59e09d31a4915ff4c3b94c6514af4c84b22c4cc8ad7c3c546a86150a92 \
+    --hash=sha256:104bf640f408f4e115b85110047c7f27377e1a8b7ba86f7db4fa47aa49dc9a8e \
+    --hash=sha256:1393e5aa9441dafb0162c36c8506c648b89aea9565b31f6bfa351e66c11bcd82 \
+    --hash=sha256:1586ad158523f4133499a4f322b230e2cfef9cc724820dbd58595a5a236186f4 \
+    --hash=sha256:180e3fc68ee4dc5af8b33b6ca4e3bb8aa1abe25eedcb958ba5cff7123071af68 \
+    --hash=sha256:1b336d06af14f8da5b1f391e8dec03634daf54dfcb4d1c4fb6d04c09d83cef90 \
+    --hash=sha256:1c8fbce80b2b8bf135d105aa8f5b36eae0c57d702a1cc3ebdea2a6f03f6cdde5 \
+    --hash=sha256:2d673e3add00048215c2cc507f1228a7523fd8bf34f279ac98334c9b07bd2656 \
+    --hash=sha256:316f29cc3392fa3912493ee4c83afa4a0e2db04ff69600711f8c03997c39baaa \
+    --hash=sha256:33c1394d8407e2771547583b66a85d07ed441ff8fae5a4adb4237ad39ece60db \
+    --hash=sha256:37cbc7b0d93dfd133e33c7ec01123fbb90401dce174c3b6661d8d36fb1e30608 \
+    --hash=sha256:39abcacd1ed54e2c33c54bdc488b310e8ef6705833f7148b6eb9a547199d375d \
+    --hash=sha256:3ab7090f04b12dc6469882ce81244572779d3a4b67eea1c96fb9ecc8c607ef39 \
+    --hash=sha256:3b0e6e54591ae0d7427def8a4d40fca99df6b899d10354bab73cd5609807261c \
+    --hash=sha256:416e2a8845eaff288f97eaf76ab40367deafb9073ffc47bf2a583f26b05e5265 \
+    --hash=sha256:4545485fef7a8a2d8f30e6f79ce719eb154aab7e44217eb444c1d38239af2072 \
+    --hash=sha256:4c124025430249118d018dcedc8b7426f39373527c845093132196f2a483b6dd \
+    --hash=sha256:4fbb7a0c3c21908520149d7751cf5b74eb9b38b54d62997b1e9b3ac19a8ee2fe \
+    --hash=sha256:52fc89602cde411a4196c8c6894afb384f2125f34c031774f82a4f2608c59d7d \
+    --hash=sha256:55143aa13c49491f5606f05b49ed88663446dce3a4d3c5d77baa4e36a16d3573 \
+    --hash=sha256:57f3bd0d29bf2bd9325c0ff9cc532a175110c4bf8f412c05b2405fd35745266d \
+    --hash=sha256:5b2f144444879363ea8834cd7b6869d79ac796cb8f864b0cfdde50296cd95816 \
+    --hash=sha256:5efdeff5f353ed3352c04e6b318ab05c6ce9249c25ed3c2090c6e9cadda1e3b2 \
+    --hash=sha256:60e6347d1ed882b1159ffea172cb8466ee46c665af4ca397edbf10ff53e9ffaf \
+    --hash=sha256:693d921621a0c8043bfdc61f7d4df5ea6d22165fe8b807cac21eb80dd94e4bbd \
+    --hash=sha256:708f0a1105ef2b11c79ed54ed31f17e6325ac936501fc373f24be3e6a578146a \
+    --hash=sha256:70f0925c4e2bfc965369f417e7cc72538fd1ba91639cf1e4ef4b1a6b50439b3b \
+    --hash=sha256:7789e700f33f2b133adae582c9f437523cd5db8de845774988a58c360fc88253 \
+    --hash=sha256:7b6c96d69928a3a6767fab8dc1ce8a02cf0156836ccb1e820c7f45a423570d98 \
+    --hash=sha256:7d2a65876274acf544703e943c010b60bd79404e3623a1e5d52b64a6e2728de5 \
+    --hash=sha256:7f18d47641282664276977c604b5a261e51fefc2980f5271d547d706b06a837f \
+    --hash=sha256:89078312f06237417adda7c021c33f80f7a6d2db8572a5f6c330d89b080061ce \
+    --hash=sha256:8c938c6ae59be67ac19a7204e079efc94b38222cd7d0269f96e45e18cddeaa59 \
+    --hash=sha256:8e336b56301774ace6be0017ff85c3566c556d938359b61b840796a0202f805c \
+    --hash=sha256:a0a207c87a9f743c8072d059b4711f8d13c456eb42dac778a7d2e5d4f3c253a7 \
+    --hash=sha256:a2454b12a3f12cc4698f3508912e6225ec63682e2ca5a96f80a2b93cef9e63f3 \
+    --hash=sha256:a538a23119d1e2e2ce077e902d02ea3d8e0641786ef6e0faf11ce82324743944 \
+    --hash=sha256:aa4dff57fc21a575672176d5ab0ef15a927199e775c5e8a3d75162ab2b0c7705 \
+    --hash=sha256:ad0edaa97cb983d9f2ff48cadddc3e1fb09f24aa558abeb4dc9a0dbacd12cbb4 \
+    --hash=sha256:ae8006772c6b0fa53c33747913473e064985dac4d65f77fd2fdc6474e7cd54e4 \
+    --hash=sha256:b0fac2088ec4aaeb5468b814bd3ff5e5978364bfbce5e567c44c9e2854469f6c \
+    --hash=sha256:b3e212a894d8ae07fde2ca8b43d666a6d49bbbddb10da0f6a74ca7bd31f20054 \
+    --hash=sha256:b54a1ee4c6f1905a436cbaa04b26626d27925a41cbc3a337e2d3ff7038187f07 \
+    --hash=sha256:b667b91f4f714b17af2a18e220015c941d1cf8b07c17f2160033dbe1e64149f0 \
+    --hash=sha256:b8c36093aca722db73633cf2359026ed7782a239eb1c6db2abcff876012dc4cf \
+    --hash=sha256:bb356e7ae7c2da13f404bf8f75be90f743c6df8d4607022e759f5d7d89fe83f8 \
+    --hash=sha256:bce730d484038e97f27ea2dbe5d392ec5c2261f28c319a3bb266f6b213650135 \
+    --hash=sha256:c075d167a6ec99b798c1fdf6e391a1d5a2d054caffe9593ba0f97e3df2c04f0e \
+    --hash=sha256:c4e09534037933bf6eb31d804e72c52ec23219b32c1730f9152feabbd7499463 \
+    --hash=sha256:c5f8a5364fc37b2f172c26a038bc7ec4885f429de4a05fc10fdcb53fb5834c5c \
+    --hash=sha256:cb203c0afffaf1a8f5b9659a013f8f16a1b2cad3a80a8733ceedc968c0cf4c57 \
+    --hash=sha256:cc41374d2f27d81d6558f8a24e5c114580ffefc197fd43eabd7058182f743322 \
+    --hash=sha256:cd879d4646055a573775a1cec863d00c9ff8c55860f8b17f6d8eee9140c06166 \
+    --hash=sha256:d013c07061751ae81861cae6ec3a4fe04e84781b11fd4b6b4201590234b25c7b \
+    --hash=sha256:d8c7524779003d59948c51b4fcbf1ca4e27c26a7d75984f63488f3625c328b9b \
+    --hash=sha256:d9710521f07f526de30ccdead67e6b236fe996d214e1a7fba8b36e2ba2cd8261 \
+    --hash=sha256:e1ffde1d6bc2a92f9c9207d1ad808550873748ac2d4d923c815b866baa343b3f \
+    --hash=sha256:e7f559c36d5cdc448ee13e7e56ed7b6b5d44a40a511d584d388a0f5d940977ba \
+    --hash=sha256:f2a1e18a85bd066c7c556d85277a7adf4651f259b2579113844835ba1a74aafd \
+    --hash=sha256:f32b165bf6dfea0846a9c9c38b7e1d68f313956d60a15cde5d1709fddcaf3bee \
+    --hash=sha256:f5a2f71d6a91238e7628f23538c26aa464d390cbdedf12ee2a7a0fb92a24482a \
+    --hash=sha256:f81fe93dc1b8e5673f33443c0786c14b77e36f1025973b85e07c70353e46882b
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \
+    --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a
+distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \
     --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" \
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \
     --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
     --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
-filelock==3.17.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338 \
-    --hash=sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e
-importlib-metadata==8.6.1 ; python_version >= "3.9" and python_full_version < "3.10.2" \
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \
+    --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755
+filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \
+    --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de
+importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \
     --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \
     --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
-    --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
-joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15" \
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \
+    --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \
+    --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \
+    --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \
     --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e
-jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15" \
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \
     --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf
-jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15" \
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \
     --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566
-mock==5.2.0 ; python_version >= "3.9" and python_version < "3.15" \
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \
+    --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \
+    --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \
+    --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \
+    --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \
+    --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \
+    --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \
+    --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \
+    --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \
+    --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \
+    --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \
+    --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \
+    --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \
+    --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \
+    --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \
+    --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \
+    --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \
+    --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \
+    --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \
+    --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \
+    --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \
+    --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \
+    --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \
+    --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \
+    --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \
+    --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \
+    --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \
+    --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \
+    --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \
+    --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \
+    --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \
+    --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \
+    --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \
+    --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \
+    --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \
+    --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \
+    --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \
+    --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \
+    --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \
+    --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \
+    --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \
+    --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \
+    --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \
+    --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \
+    --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \
+    --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \
+    --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \
+    --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \
+    --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \
+    --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \
+    --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \
+    --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \
+    --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \
+    --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \
+    --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \
+    --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \
+    --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \
+    --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \
+    --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \
+    --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \
+    --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca
+mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \
     --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f
-nox-poetry==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \
+nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \
     --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682
-nox==2024.10.9 ; python_version >= "3.9" and python_version < "3.15" \
+nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
     --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
     --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
@@ -147,10 +226,10 @@ numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
     --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
     --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
-packaging==24.2 ; python_version >= "3.9" and python_version < "3.15" \
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
-pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \
     --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \
     --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \
@@ -193,28 +272,46 @@ pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
-pep440==0.1.2 ; python_version >= "3.9" and python_version < "3.15" \
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \
+    --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d
+pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \
     --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d
-platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \
-    --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15" \
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+    --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \
+    --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \
+    --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
     --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
-pyproject-hooks==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \
+    --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+    --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \
+    --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \
+    --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \
+    --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c
+pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \
     --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913
-pytest-cov==5.0.0 ; python_version >= "3.9" and python_version < "3.15" \
+pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \
     --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857
-pytest-timeout==2.3.1 ; python_version >= "3.9" and python_version < "3.15" \
+pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \
     --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e
-pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15" \
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \
     --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845
-python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15" \
+python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:02f46e4a7e8a46048604870287f1c55312eea47c2c15dd58b51057cb7d057bdc \
     --hash=sha256:0e5ece0b4e85ed680af6b9db33ef3497a6f9499b8957cd830cd139f17ac29aef \
     --hash=sha256:0f3a09c1947e6a90b9558cd1651e86dbe10f698aad56247596f2b856307707f0 \
@@ -231,16 +328,16 @@ python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15"
     --hash=sha256:f28d07eae04d83d454f0e6ba2da0678786a21f2d405998a3eec960b56d809692 \
     --hash=sha256:fbb6ab033a7a4250bce11ca12fdf8958c6c42853e933cf585dbd265e0967dd93 \
     --hash=sha256:fc3cffd0f16cb9b34d2e95bd6d27425dd24044073760477a1341e835fc9c45f4
-python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15" \
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
     --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
-pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15" \
+pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57 \
     --hash=sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e
-referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15" \
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \
     --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0
-rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \
     --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \
     --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \
@@ -344,7 +441,7 @@ rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \
     --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \
     --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8
-ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \
+ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \
     --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \
     --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \
@@ -362,7 +459,7 @@ ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \
     --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \
     --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca
-scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \
     --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \
     --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \
@@ -393,39 +490,63 @@ scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \
     --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \
     --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d \
-    --hash=sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c \
-    --hash=sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca \
-    --hash=sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9 \
-    --hash=sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54 \
-    --hash=sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16 \
-    --hash=sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2 \
-    --hash=sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5 \
-    --hash=sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59 \
-    --hash=sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326 \
-    --hash=sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b \
-    --hash=sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1 \
-    --hash=sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d \
-    --hash=sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24 \
-    --hash=sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627 \
-    --hash=sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c \
-    --hash=sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa \
-    --hash=sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949 \
-    --hash=sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989 \
-    --hash=sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004 \
-    --hash=sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f \
-    --hash=sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884 \
-    --hash=sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299 \
-    --hash=sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94 \
-    --hash=sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f
-six==1.17.0 ; python_version >= "3.9" and python_version < "3.15" \
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \
+    --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \
+    --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \
+    --hash=sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d \
+    --hash=sha256:28a0d2c2075946346e4408b211240764759e0fabaeb08d871639b5f3b1aca8a0 \
+    --hash=sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8 \
+    --hash=sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af \
+    --hash=sha256:42dabaaa798e987c425ed76062794e93a243be8f0f20fff6e7a89f4d61cb3d40 \
+    --hash=sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9 \
+    --hash=sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971 \
+    --hash=sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d \
+    --hash=sha256:597a0c7008b21c035831c39927406c6181bcf8f60a73f36219b69d010aa04737 \
+    --hash=sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e \
+    --hash=sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32 \
+    --hash=sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53 \
+    --hash=sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1 \
+    --hash=sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d \
+    --hash=sha256:6a8e34cf4c188b6dd004654f88586d78f95639e48a25dfae9c5e34a6dc34547e \
+    --hash=sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776 \
+    --hash=sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5 \
+    --hash=sha256:6f5e296ec63c5da6ba6fa0343ea73fd51b8b3e1a300b0a8cae3ed4b1122c7462 \
+    --hash=sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274 \
+    --hash=sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301 \
+    --hash=sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3 \
+    --hash=sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58 \
+    --hash=sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4 \
+    --hash=sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa \
+    --hash=sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9 \
+    --hash=sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27 \
+    --hash=sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9 \
+    --hash=sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f \
+    --hash=sha256:a8bf5cb4a25046ac61d38f8d3c3426ec11ebc350246a4642f2f315fe95bda655 \
+    --hash=sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20 \
+    --hash=sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65 \
+    --hash=sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93 \
+    --hash=sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828 \
+    --hash=sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd \
+    --hash=sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f \
+    --hash=sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec \
+    --hash=sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb \
+    --hash=sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6 \
+    --hash=sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded \
+    --hash=sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e \
+    --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \
+    --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \
+    --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
     --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81
-threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \
-    --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
-tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \
+    --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \
+    --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \
     --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \
     --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \
@@ -458,21 +579,27 @@ tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \
     --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \
     --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7
-tomlkit==0.13.2 ; python_version >= "3.9" and python_version < "3.15" \
+tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \
     --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" \
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \
+    --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
-tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15" \
+tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694 \
     --hash=sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639
-virtualenv==20.29.3 ; python_version >= "3.9" and python_version < "3.15" \
+virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \
     --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac
-xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15" \
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \
+    --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \
     --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac
-zipp==3.21.0 ; python_version >= "3.9" and python_full_version < "3.10.2" \
+zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \
     --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \
     --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931
diff --git a/pyproject.toml b/pyproject.toml
index c62cb66e6..2529c7adc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,7 +109,7 @@ markupsafe = "^2.0.1"       # TODO why do we need markupsafe here?
 # sphinx-autodoc-typehints = "^1.24.0"
 
 # ATTENTION: if anything is changed here, run `poetry update`
-# Please also run `poetry export -f requirements.txt --output docs/requirements_test.txt --with test`
+# Please also run `poetry export -f requirements.txt --output doc/requirements_test.txt --with test`
 [tool.poetry.group.test]
 optional = true
 [tool.poetry.group.test.dependencies]

From 64dfd95f3a4da7b65031a72d0d7c58d18b92852c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:37:21 +0100
Subject: [PATCH 135/253] Improved assertion error message

---
 test/test_runners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_runners.py b/test/test_runners.py
index 527c1d252..13a398d3e 100644
--- a/test/test_runners.py
+++ b/test/test_runners.py
@@ -158,7 +158,7 @@ def test_time_keeping(env):
                               answer=answer)
     max_time = (time.perf_counter() - start) * 1e3  # ms
 
-    assert len(result) >= 10
+    assert len(result) >= 10, f"{len(result)=} < 10 for {kernel_name=} with {tune_params=}"
 
     timings = [
         'total_framework_time', 'total_strategy_time', 'total_compile_time',

From 5a83d3659aef4737238422e32acaee5b0e97bdde Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:38:46 +0100
Subject: [PATCH 136/253] Added logging in case default block size restriction
 is added

---
 kernel_tuner/searchspace.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6331bed33..8b285f5ad 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -236,6 +236,7 @@ def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: in
                     isinstance(self._modified_restrictions, list)
                     and block_size_restriction_spaced not in self._modified_restrictions
                 ):
+                    print(f"added default block size restriction '{block_size_restriction_spaced}'")
                     self._modified_restrictions.append(block_size_restriction_spaced)
                     if isinstance(self.restrictions, list):
                         self.restrictions.append(block_size_restriction_spaced)

From 5e3512b748b0027ea122904a8cd262dba1a06373 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:39:06 +0100
Subject: [PATCH 137/253] Adjusted path to benchmarking kernels

---
 kernel_tuner/backends/hypertuner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index b15da315a..6348cc56d 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -62,7 +62,7 @@ def compile(self, kernel_instance):
 
         # TODO get applications & GPUs args from benchmark
         gpus = ["A100", "A4000", "MI250X"]
-        folder = "../../autotuning_methodology/benchmark_hub/kernels"
+        folder = "../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
                 "name": "dedispersion_milo",

From bff6d7b820300bf2805508b8a34add82917f056e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:39:46 +0100
Subject: [PATCH 138/253] Automatically adjust genetic algorithm popsize for
 smaller search spaces

---
 kernel_tuner/strategies/genetic_algorithm.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 0ca0f5f75..6a8565118 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -21,9 +21,15 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
     pop_size, generations, method, mutation_chance = common.get_options(options, _options)
-    pop_size = min(round(searchspace.size / 2), pop_size)
     crossover = supported_methods[method]
 
+    # if left to the default, adjust the popsize to a sensible value for small search spaces
+    if pop_size == _options["popsize"][1]:
+        pop_size = min(round(searchspace.size / 2), pop_size)
+    else:
+        # otherwise, just make sure it doesn't exceed the search space size
+        pop_size = min(searchspace.size, pop_size)
+
     best_score = 1e20
     cost_func = CostFunc(searchspace, tuning_options, runner)
 

From 8ddce18916187c894c0bc94cac9b5bb740aec289 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 13:46:50 +0100
Subject: [PATCH 139/253] Updated poetry configuration fields to project
 configuration fields, updated dependencies

---
 doc/requirements.txt      |  2 +-
 doc/requirements_test.txt | 93 +++++++++++++++++++++++----------------
 pyproject.toml            | 90 +++++++++++++++++--------------------
 3 files changed, 98 insertions(+), 87 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 5f316bb33..fd92b26ff 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -36,7 +36,7 @@ nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or pyth
 nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index 11ed8518b..b5a5c1443 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -189,43 +189,62 @@ nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or pyt
 nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
-    --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
-    --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
-    --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \
-    --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \
-    --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \
-    --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \
-    --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \
-    --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \
-    --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \
-    --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \
-    --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \
-    --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \
-    --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \
-    --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \
-    --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \
-    --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \
-    --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \
-    --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \
-    --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \
-    --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \
-    --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \
-    --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \
-    --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \
-    --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \
-    --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \
-    --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \
-    --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \
-    --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \
-    --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \
-    --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \
-    --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \
-    --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \
-    --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
-    --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
-    --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
+numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286 \
+    --hash=sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542 \
+    --hash=sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f \
+    --hash=sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d \
+    --hash=sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0 \
+    --hash=sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7 \
+    --hash=sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3 \
+    --hash=sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3 \
+    --hash=sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146 \
+    --hash=sha256:2fa8fa7697ad1646b5c93de1719965844e004fcad23c91228aca1cf0800044a1 \
+    --hash=sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6 \
+    --hash=sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc \
+    --hash=sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9 \
+    --hash=sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592 \
+    --hash=sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00 \
+    --hash=sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298 \
+    --hash=sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8 \
+    --hash=sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2 \
+    --hash=sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392 \
+    --hash=sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb \
+    --hash=sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8 \
+    --hash=sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd \
+    --hash=sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9 \
+    --hash=sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0 \
+    --hash=sha256:892c10d6a73e0f14935c31229e03325a7b3093fafd6ce0af704be7f894d95687 \
+    --hash=sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc \
+    --hash=sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f \
+    --hash=sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4 \
+    --hash=sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a \
+    --hash=sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39 \
+    --hash=sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4 \
+    --hash=sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775 \
+    --hash=sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c \
+    --hash=sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd \
+    --hash=sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020 \
+    --hash=sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d \
+    --hash=sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24 \
+    --hash=sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7 \
+    --hash=sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f \
+    --hash=sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba \
+    --hash=sha256:cf28633d64294969c019c6df4ff37f5698e8326db68cc2b66576a51fad634880 \
+    --hash=sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d \
+    --hash=sha256:db1f1c22173ac1c58db249ae48aa7ead29f534b9a948bc56828337aa84a32ed6 \
+    --hash=sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854 \
+    --hash=sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017 \
+    --hash=sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8 \
+    --hash=sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae \
+    --hash=sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4 \
+    --hash=sha256:ea2bb7e2ae9e37d96835b3576a4fa4b3a97592fbea8ef7c3587078b0068b8f09 \
+    --hash=sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff \
+    --hash=sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960 \
+    --hash=sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee \
+    --hash=sha256:f4162988a360a29af158aeb4a2f4f09ffed6a969c9776f8f3bdee9b06a8ab7e5 \
+    --hash=sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c \
+    --hash=sha256:f7de08cbe5551911886d1ab60de58448c6df0f67d9feb7d1fb21e9875ef95e91
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
diff --git a/pyproject.toml b/pyproject.toml
index 2529c7adc..02e70089f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,24 +2,22 @@
 requires = ["poetry-core>=1.7.0", "setuptools>=67.7.2"]
 build-backend = "poetry.core.masonry.api"
 
-[tool.poetry]
+[project]
 name = "kernel_tuner"
-packages = [{ include = "kernel_tuner", from = "." }]
 description = "An easy to use CUDA/OpenCL kernel tuner in Python"
 version = "1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
+readme = "README.md"
 license = "Apache-2.0"
 authors = [
-    "Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>",
-    "Alessio Sclocco <a.sclocco@esciencecenter.nl>",
-    "Stijn Heldens <s.heldens@esciencecenter.nl>",
-    "Floris-Jan Willemsen <f.j.Willemsen@esciencecenter.nl>",
-    "Willem-Jan Palenstijn <w.j.palenstijn@liacs.leidenuniv.nl>",
-    "Bram Veenboer <veenboer@astron.nl>",
-    "Richard Schoonhoven <Richard.Schoonhoven@cwi.nl>",
-    "Leon Oostrum <l.oostrum@esciencecenter.nl",
+    { name = "Ben van Werkhoven", email = "b.vanwerkhoven@esciencecenter.nl"},
+    { name = "Alessio Sclocco", email = "a.sclocco@esciencecenter.nl" },
+    { name = "Stijn Heldens", email = "s.heldens@esciencecenter.nl" },
+    { name = "Floris-Jan Willemsen", email = "f.j.Willemsen@esciencecenter.nl" },
+    { name = "Willem-Jan Palenstijn", email = "w.j.palenstijn@liacs.leidenuniv.nl" },
+    { name = "Bram Veenboer", email = "veenboer@astron.nl" },
+    { name = "Richard Schoonhoven", email = "Richard.Schoonhoven@cwi.nl"  },
+    { name = "Leon Oostrum", email = "l.oostrum@esciencecenter.nl" },
 ]
-
-readme = "README.md"
 keywords = [
     "auto-tuning",
     "gpu",
@@ -44,55 +42,49 @@ classifiers = [
     "Topic :: Software Development",
     "Topic :: System :: Distributed Computing",
 ]
-include = [
-    { path = "test" },
-] # this ensures that people won't have to clone the whole repo to include notebooks, they can just do `pip install kernel_tuner[tutorial,cuda]`
+
+# ATTENTION: if anything is changed here, run `poetry update`
+requires-python = ">=3.10,<3.15"  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
+dependencies = [
+    "numpy>=1.26.0",    # Python 3.12 requires numpy at least 1.26
+    "scipy>=1.14.1",
+    "packaging",        # required by file_utils
+    "jsonschema",
+    "python-constraint2>=2.1.0",
+    "xmltodict",
+    "pandas>=2.0.0",
+    "scikit-learn>=1.0.2",
+]
+# NOTE Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it
+
+[project.urls]
 homepage = "https://KernelTuner.github.io/kernel_tuner/"
 documentation = "https://KernelTuner.github.io/kernel_tuner/"
 repository = "https://github.com/KernelTuner/kernel_tuner"
-[tool.poetry.urls]
-"Tracker" = "https://github.com/KernelTuner/kernel_tuner/issues"
-[tool.poetry.build]
-generate-setup-file = false
-[tool.poetry.scripts]
+changelog = "https://github.com/KernelTuner/kernel_tuner/blob/master/CHANGELOG.md"
+issues = "https://github.com/KernelTuner/kernel_tuner/issues"
+
+[project.scripts]
 kernel_tuner = "kernel_tuner.interface:entry_point"
 
-# ATTENTION: if anything is changed here, run `poetry update`
-[tool.poetry.dependencies]
-python = ">=3.10,<3.15"    # NOTE when changing the supported Python versions, also change the test versions in the noxfile
-numpy = "^1.26.0"          # Python 3.12 requires numpy at least 1.26
-scipy = ">=1.14.1"
-packaging = "*"                 # required by file_utils
-jsonschema = "*"
-python-constraint2 = "^2.1.0"
-xmltodict = "*"
-pandas = ">=2.0.0"
-scikit-learn = ">=1.0.2"
-# Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it
+[tool.poetry]
+packages = [{ include = "kernel_tuner", from = "." }]
+include = [
+    { path = "test" },
+] # this ensures that people won't have to clone the whole repo to include notebooks, they can just do `pip install kernel_tuner[tutorial,cuda]`
 
 # List of optional dependencies for user installation, e.g. `pip install kernel_tuner[cuda]`, used in the below `extras`.
 # Please note that this is different from the dependency groups below, e.g. `docs` and `test`, those are for development.
 # CUDA
-pycuda = { version = "^2024.1", optional = true }           # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
-nvidia-ml-py = { version = "^12.535.108", optional = true }
-pynvml = { version = "^11.4.1", optional = true }
-# cupy-cuda11x = { version = "*", optional = true }    # Note: these are completely optional dependencies as described in CONTRIBUTING.rst
+# cupy-cuda11x = { version = "*", optional = true }    # NOTE: these are completely optional dependencies as described in CONTRIBUTING.rst
 # cupy-cuda12x = { version = "*", optional = true }
 # cuda-python = { version = "*", optional = true }
-# OpenCL
-pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
-# HIP
-hip-python-fork = { version = "*", optional = true }
-# Tutorial (for the notebooks used in the examples)
-jupyter = { version = "^1.0.0", optional = true }
-matplotlib = { version = "^3.5.0", optional = true }
-
-[tool.poetry.extras]
-cuda = ["pycuda", "nvidia-ml-py", "pynvml"]
-opencl = ["pyopencl"]
-cuda_opencl = ["pycuda", "pyopencl"]
+[project.optional-dependencies]
+cuda = ["pycuda>=2024.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
+opencl = ["pyopencl"]                                                   # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
+cuda_opencl = ["pycuda>=2024.1", "pyopencl"]                            # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
 hip = ["hip-python-fork"]
-tutorial = ["jupyter", "matplotlib", "nvidia-ml-py"]
+tutorial = ["jupyter>=1.0.0", "matplotlib>=3.5.0", "nvidia-ml-py>=12.535.108"]
 
 # ATTENTION: if anything is changed here, run `poetry update` and `poetry export --with docs --without-hashes --format=requirements.txt --output doc/requirements.txt`
 # Please note that there is overlap with the `dev` group

From 19470e440f40f5a90a725dc403425a86e52898a1 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 14:47:41 +0100
Subject: [PATCH 140/253] Removed not yet fully implemented bayesian
 optimization references, made tests work

---
 kernel_tuner/interface.py                     |   12 +-
 kernel_tuner/strategies/bayes_opt.py          |    6 +-
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |  245 ----
 kernel_tuner/strategies/bayes_opt_GPyTorch.py |  926 --------------
 .../strategies/bayes_opt_GPyTorch_lean.py     | 1084 -----------------
 .../strategies/bayes_opt_alt_BOTorch.py       |   75 --
 kernel_tuner/strategies/bayes_opt_ax.py       |   29 -
 kernel_tuner/strategies/bayes_opt_old.py      |    2 +-
 test/strategies/test_bayesian_optimization.py |    2 +-
 test/test_searchspace.py                      |   16 -
 10 files changed, 6 insertions(+), 2391 deletions(-)
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_ax.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 5f4c1b628..9741bd1d8 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -50,11 +50,6 @@
 from kernel_tuner.strategies import (
     basinhopping,
     bayes_opt,
-    bayes_opt_alt_BOTorch,
-    bayes_opt_BOTorch,
-    bayes_opt_GPyTorch,
-    bayes_opt_GPyTorch_lean,
-    bayes_opt_old,
     brute_force,
     diff_evo,
     dual_annealing,
@@ -85,12 +80,7 @@
     "pso": pso,
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
-    "bayes_opt": bayes_opt,
-    "bayes_opt_old": bayes_opt_old,
-    "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
-    "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
-    "bayes_opt_BOTorch": bayes_opt_BOTorch,
-    "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
+    "bayes_opt": bayes_opt
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index e4c9c52a2..775e4193a 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -238,7 +238,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.invalid_value = 1e20
         self.opt_direction = opt_direction
         if opt_direction == "min":
-            self.worst_value = np.PINF
+            self.worst_value = np.inf
             self.argopt = np.argmin
         elif opt_direction == "max":
             self.worst_value = np.NINF
@@ -265,7 +265,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.__visited_num = 0
         self.__visited_valid_num = 0
         self.__visited_searchspace_indices = [False] * self.searchspace_size
-        self.__observations = [np.NaN] * self.searchspace_size
+        self.__observations = [np.nan] * self.searchspace_size
         self.__valid_observation_indices = [False] * self.searchspace_size
         self.__valid_params = list()
         self.__valid_observations = list()
@@ -314,7 +314,7 @@ def is_not_visited(self, index: int) -> bool:
 
     def is_valid(self, observation: float) -> bool:
         """Returns whether an observation is valid."""
-        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
+        return not (observation is None or observation == self.invalid_value or observation == np.nan)
 
     def get_af_by_name(self, name: str):
         """Get the basic acquisition functions by their name."""
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
deleted file mode 100644
index 5ee2854dc..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch."""
-
-from math import ceil, sqrt
-
-import numpy as np
-
-try:
-    import torch
-    from botorch import fit_gpytorch_mll
-    from botorch.acquisition import (
-        LogExpectedImprovement,
-        ProbabilityOfImprovement,
-        qExpectedUtilityOfBestOption,
-        qLogExpectedImprovement,
-        qLowerBoundMaxValueEntropy,
-    )
-    from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
-    from botorch.models.transforms import Normalize, Standardize
-    from botorch.optim import optimize_acqf_discrete
-    from botorch.optim.fit import fit_gpytorch_mll_torch
-    from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
-    from torch import Tensor
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-import gpytorch.settings as gp_settings
-import linear_operator.settings as linop_settings
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.common import CostFunc
-from kernel_tuner.util import ErrorConfig, StopCriterionReached
-
-# set gpytorch to approximate mode for faster fitting
-linop_settings._fast_covar_root_decomposition._default = True
-linop_settings._fast_log_prob._default = True
-linop_settings._fast_solves._default = True
-linop_settings.cholesky_max_tries._global_value = 6
-linop_settings.max_cholesky_size._global_value = 800
-gp_settings.max_eager_kernel_size._global_value = 800
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimization(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimization():
-    """Bayesian Optimization class."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        """Initialization of the Bayesian Optimization class. Does not evaluate configurations."""
-        self.initial_sample_taken = False
-        self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
-        self.tuning_options = tuning_options
-        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True, return_raw=True)
-        self.maximize = tuning_options['objective_higher_is_better']
-
-        # select the device to use (CUDA or Apple Silicon MPS if available)
-        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS (issue reported: https://github.com/pytorch/pytorch/issues/77764).
-        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-        # set up conversion to tensors
-        self.searchspace = searchspace
-        self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
-        self.searchspace_tensors = searchspace.get_tensorspace()
-        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs) # TODO implement continuing from cache
-        self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
-        self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
-
-    def is_valid_result(self, result, results=None):
-        """Returns whether the result is valid."""
-        if results is None:
-            results = []
-        return not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
-
-    def run_config(self, config: tuple):
-        """Run a single configuration. Returns the result and whether it is valid."""
-        result, results = self.cost_func(config)
-        results = np.array(results)
-        var = np.nan
-        valid = self.is_valid_result(result, results)
-        if not valid:
-            result = np.nan
-        elif not self.maximize:
-            result = -result
-            results = -results
-        if valid:
-            var = np.var(results)
-        return [result], [var], valid
-
-    def evaluate_configs(self, X: Tensor):
-        """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
-        if isinstance(X, Tensor):
-            valid_configs = []
-            valid_results = []
-            valid_vars = []
-            if X.dim() == 1:
-                X = [X]
-            for config in X:
-                assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
-                param_config = self.searchspace.tensor_to_param_config(config)
-                res, var, valid = self.run_config(param_config)
-                if valid:
-                    valid_configs.append(config)
-                    valid_results.append(res)
-                    valid_vars.append(var)
-                
-                # remove evaluated configurations from the full searchspace
-                index = self.searchspace.get_param_config_index(param_config)
-                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], 
-                                                      self.searchspace_tensors[index+1:]))
-
-            # add valid results to the training set
-            if len(valid_configs) > 0 and len(valid_results) > 0 and len(valid_vars) > 0:
-                self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
-                self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
-                self.train_Yvar = torch.cat([self.train_Yvar, torch.tensor(valid_vars, **self.searchspace.tensor_kwargs)])
-            return valid_results
-        else:
-            raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
-        
-    def initial_sample(self):
-        """Take an initial sample."""
-        self.initial_sample_taken = True
-        if self.initial_sample_size > 0:
-            sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
-            sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
-            self.evaluate_configs(sample_configs)
-
-    def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor=None, state_dict=None, exact=True):
-        """Initialize a model and likelihood, possibly with a state dict for faster fitting."""
-        bounds, bounds_indices = searchspace.get_tensorspace_bounds()
-        transforms = dict(
-            input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds),
-            outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
-        )
-
-        # initialize the model
-        if exact:
-            catdims = searchspace.get_tensorspace_categorical_dimensions()
-            if len(catdims) == 0:
-                model = SingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, **transforms)
-            else:
-                model = MixedSingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, cat_dims=catdims, **transforms)
-        else:
-            model = SingleTaskVariationalGP(train_X, train_Y, **transforms)
-
-        # load the previous state
-        if exact and state_dict is not None:
-            model.load_state_dict(state_dict)
-
-        # initialize the likelihood
-        if exact:
-            mll = ExactMarginalLogLikelihood(model.likelihood, model)
-        else:
-            mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
-        return model, mll
-    
-    def fit(self, mll):
-        """Fit a Marginal Log Likelihood."""
-        return fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
-
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-
-            # create array to gradually reduce number of optimization spaces as fewer fevals are left
-            tensorspace_size = self.searchspace_tensors.size(0)
-            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
-            fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
-            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
-            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
-            # if there's a discrepency, add or subtract the difference from the first number
-            if np.sum(nums_optimization_spaces) != fevals_left:
-                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
-            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
-            fevals_left += reserve_final_loops
-
-            # Bayesian optimization loop
-            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
-                num_optimization_spaces = min(num_optimization_spaces, fevals_left)
-
-                # fit on a Gaussian Process model
-                mll = self.fit(mll)
-                
-                # define the acquisition function
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                # acqf = NoisyExpectedImprovement(model=model, , maximize=True)
-                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=True)
-                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.max())
-                # acqf = qExpectedUtilityOfBestOption(pref_model=model)
-
-                # divide the optimization space into random chuncks
-                tensorspace_size = self.searchspace_tensors.size(0)
-                if num_optimization_spaces <= 1:
-                    optimization_spaces = [self.searchspace_tensors]
-                else:
-                    # shuffle the searchspace
-                    shuffled_indices = torch.randperm(tensorspace_size)
-                    tensorspace = self.searchspace_tensors[shuffled_indices]
-                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
-                
-                # optimize acquisition function to find the next evaluation point
-                for optimization_space in optimization_spaces:
-
-                    # NOTE optimize_acqf_discrete_local_search does not work with variable optimization_space size
-                    # optimize over a lattice if the space is too large
-                    # if len(optimization_spaces) == 1 and max_batch_size < optimization_space.size(0):
-                    #     candidate, _ = optimize_acqf_discrete_local_search(
-                    #         acqf,
-                    #         q=1,
-                    #         discrete_choices=optimization_space,
-                    #         max_batch_size=max_batch_size,
-                    #         num_restarts=5,
-                    #         raw_samples=1024
-                    #     )
-                    # else:
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
-                    
-                    # evaluate the new candidate
-                    self.evaluate_configs(candidate)
-                    fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if loop_i < len(nums_optimization_spaces) - 1:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
deleted file mode 100644
index 39da1c30d..000000000
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch.py
+++ /dev/null
@@ -1,926 +0,0 @@
-"""Bayesian Optimization implementation from the thesis by Willemsen."""
-import itertools
-import time
-from copy import deepcopy
-from random import randint, shuffle
-from typing import Tuple
-
-import numpy as np
-from scipy.stats import norm
-
-# BO imports
-try:
-    import gpytorch
-    import torch
-    from sklearn.exceptions import ConvergenceWarning
-    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
-    from skopt.sampler import Lhs
-    bayes_opt_present = True
-
-    class ExactGPModel(gpytorch.models.ExactGP):
-        """Very simple exact Gaussian Process model."""
-
-        def __init__(self, train_x, train_y, likelihood):
-            super(gpytorch.models.ExactGP, self).__init__(train_x, train_y, likelihood)
-            self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
-            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
-
-        def forward(self, x):
-            mean_x = self.mean_module(x)
-            covar_x = self.covar_module(x)
-            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-except ImportError:
-    bayes_opt_present = False
-
-    class ExactGPModel():
-        def __init__(self, train_x, train_y, likelihood):
-            raise ImportError("GPyTorch not imported")
-        def forward(self, x):
-            raise ImportError("GPyTorch not imported")
-
-from kernel_tuner import util
-from kernel_tuner.strategies import minimize
-
-supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
-
-
-def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """Generates normalization and denormalization dictionaries."""
-    original_to_normalized = dict()
-    normalized_to_original = dict()
-    for param_name in tune_params.keys():
-        original_to_normalized_dict = dict()
-        normalized_to_original_dict = dict()
-        for value_index, value in enumerate(tune_params[param_name]):
-            normalized_value = eps * value_index + 0.5 * eps
-            normalized_to_original_dict[normalized_value] = value
-            original_to_normalized_dict[value] = normalized_value
-        original_to_normalized[param_name] = original_to_normalized_dict
-        normalized_to_original[param_name] = normalized_to_original_dict
-    return original_to_normalized, normalized_to_original
-
-
-def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """Normalize the parameter space given a normalization dictionary."""
-    keys = list(tune_params.keys())
-    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
-    return param_space_normalized
-
-
-def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict: dict, max_threads: int):
-    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
-    pruned_tune_params_mask = list()
-    removed_tune_params = list()
-    param_names = list(tune_params.keys())
-    for index, key in enumerate(tune_params.keys()):
-        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
-        if len(tune_params[key]) > 1:
-            removed_tune_params.append(None)
-        else:
-            value = tune_params[key][0]
-            normalized = normalize_dict[param_names[index]][value]
-            removed_tune_params.append(normalized)
-    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
-        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
-    # TODO check whether the number of pruned parameters is correct
-    # print(
-    #     f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}, by util: {util.get_number_of_valid_configs(tuning_options, max_threads)}"
-    # )
-    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
-    return parameter_space, removed_tune_params
-
-
-def tune(runner, kernel_options, device_options, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process. Allows setting hyperparameters via the strategy_options key.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    if not bayes_opt_present:
-        raise ImportError(
-            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
-        )
-
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
-    if not bayes_opt_present:
-        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed, please install scikit-learn and scikit-optimize")
-
-    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
-    tune_params = tuning_options.tune_params
-    tuning_options["scaling"] = True
-    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
-
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if tuning_options.restrictions is not None:
-        tuning_options.verbose = False
-    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
-    parameter_space = list(parameter_space)
-    if len(parameter_space) < 1:
-        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
-    if len(parameter_space) == 1:
-        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
-
-    # normalize search space to [0,1]
-    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
-    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
-
-    # prune the parameter space to remove dimensions that have a constant parameter
-    if prune_parameterspace:
-        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict, runner.dev.max_threads)
-    else:
-        parameter_space = list(parameter_space)
-        removed_tune_params = [None] * len(tune_params.keys())
-
-    # initialize and optimize
-    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
-    results = bo.optimize(max_fevals)
-
-    return results, runner.dev.get_environment()
-
-
-class BayesianOptimization():
-
-    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
-                 runner, opt_direction='min'):
-        time_start = time.perf_counter_ns()
-
-        # supported hyperparameter values
-        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
-        self.supported_methods = supported_methods
-        self.supported_sampling_methods = ["random", "lhs"]
-        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
-
-        def get_hyperparam(name: str, default, supported_values=list()):
-            value = tuning_options.strategy_options.get(name, default)
-            if len(supported_values) > 0 and value not in supported_values:
-                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-            return value
-
-        # get hyperparameters
-        get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
-        get_hyperparam("covariancelengthscale", 1.5)
-        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
-        acq = acquisition_function
-        acq_params = get_hyperparam("methodparams", {})
-        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
-        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
-        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
-        self.training_iter = get_hyperparam("training_iter", 10)
-        self.num_initial_samples = get_hyperparam("popsize", 20)
-        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
-        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
-        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
-
-        # set acquisition function hyperparameter defaults where missing
-        if 'explorationfactor' not in acq_params:
-            acq_params['explorationfactor'] = 'CV'
-        if 'zeta' not in acq_params:
-            acq_params['zeta'] = 1
-        if 'skip_duplicate_after' not in acq_params:
-            acq_params['skip_duplicate_after'] = 5
-
-        # set arguments
-        self.kernel_options = kernel_options
-        self.tuning_options = tuning_options
-        self.tune_params = tuning_options.tune_params
-        self.param_names = list(self.tune_params.keys())
-        self.normalized_dict = normalize_dict
-        self.denormalized_dict = denormalize_dict
-        self.runner = runner
-        self.max_threads = runner.dev.max_threads
-        self.log_timings = False
-
-        # set optimization constants
-        self.invalid_value = 1e20
-        self.opt_direction = opt_direction
-        if opt_direction == 'min':
-            self.worst_value = np.PINF
-            self.argopt = np.argmin
-        elif opt_direction == 'max':
-            self.worst_value = np.NINF
-            self.argopt = np.argmax
-        else:
-            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
-
-        # set the acquisition function and surrogate model
-        self.optimize = self.__optimize
-        self.af_name = acquisition_function
-        self.af_params = acq_params
-        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
-        self.set_acquisition_function(acquisition_function)
-        # self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
-
-        # set remaining values
-        self.results = []
-        self.__searchspace = searchspace
-        self.removed_tune_params = removed_tune_params
-        self.searchspace_size = len(self.searchspace)
-        self.hyperparams = {
-            'loss': np.nan,
-            'lengthscale': np.nan,
-            'noise': np.nan,
-        }
-        self.num_dimensions = len(self.dimensions())
-        self.__current_optimum = self.worst_value
-        self.cv_norm_maximum = None
-        self.fevals = 0
-        self.__visited_num = 0
-        self.__visited_valid_num = 0
-        self.__visited_searchspace_indices = [False] * self.searchspace_size
-        self.__observations = [np.NaN] * self.searchspace_size
-        self.__valid_observation_indices = [False] * self.searchspace_size
-        self.__valid_params = list()
-        self.__valid_observations = list()
-        self.unvisited_cache = self.unvisited()
-        time_setup = time.perf_counter_ns()
-        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
-
-        # take initial sample
-        self.initial_sample()
-        time_initial_sample = time.perf_counter_ns()
-
-        # print the timings
-        if self.log_timings:
-            time_taken_setup = round(time_setup - time_start, 3) / 1000
-            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
-            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
-            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
-
-    @property
-    def searchspace(self):
-        return self.__searchspace
-
-    @property
-    def observations(self):
-        return self.__observations
-
-    @property
-    def current_optimum(self):
-        return self.__current_optimum
-
-    @current_optimum.setter
-    def current_optimum(self, value: float):
-        self.__current_optimum = value
-
-    def is_better_than(self, a: float, b: float) -> bool:
-        """Determines which one is better depending on optimization direction."""
-        return a < b if self.opt_direction == 'min' else a > b
-
-    def is_not_visited(self, index: int) -> bool:
-        """Returns whether a searchspace index has not been visited."""
-        return not self.__visited_searchspace_indices[index]
-
-    def is_valid(self, observation: float) -> bool:
-        """Returns whether an observation is valid."""
-        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
-
-    def get_af_by_name(self, name: str):
-        """Get the basic acquisition functions by their name."""
-        basic_af_names = ['ei', 'poi', 'lcb']
-        if name == 'ei':
-            return self.af_expected_improvement
-        elif name == 'poi':
-            return self.af_probability_of_improvement
-        elif name == 'lcb':
-            return self.af_lower_confidence_bound
-        raise ValueError(f"{name} not in {basic_af_names}")
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """Set the acquisition function."""
-        if acquisition_function == 'poi':
-            self.__af = self.af_probability_of_improvement
-        elif acquisition_function == 'ei':
-            self.__af = self.af_expected_improvement
-        elif acquisition_function == 'lcb':
-            self.__af = self.af_lower_confidence_bound
-        elif acquisition_function == 'lcb-srinivas':
-            self.__af = self.af_lower_confidence_bound_srinivas
-        elif acquisition_function == 'random':
-            self.__af = self.af_random
-        elif acquisition_function == 'multi':
-            self.optimize = self.__optimize_multi
-        elif acquisition_function == 'multi-advanced':
-            self.optimize = self.__optimize_multi_advanced
-        elif acquisition_function == 'multi-fast':
-            self.optimize = self.__optimize_multi_fast
-        else:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """Set the surrogate model with a covariance function and lengthscale."""
-        # TODO remove or adapt this
-        if cov_kernel_name == "constantrbf":
-            ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "rbf":
-            RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern32":
-            Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern52":
-            Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
-        else:
-            raise ValueError(f"Acquisition function must be one of {self.supported_cov_kernels}, is {cov_kernel_name}")
-        likelihood = gpytorch.likelihoods.GaussianLikelihood()
-        self.__model = ExactGPModel(train_x, train_y, likelihood)
-        # self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
-
-    def valid_params_observations(self) -> Tuple[list, list]:
-        """Returns a list of valid observations and their parameter configurations."""
-        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
-        params = list()
-        observations = list()
-        for index, valid in enumerate(self.__valid_observation_indices):
-            if valid is True:
-                params.append(self.searchspace[index])
-                observations.append(self.observations[index])
-        return params, observations
-
-    def unvisited(self) -> list:
-        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
-        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
-        return params
-
-    def find_param_config_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the search space if it exists."""
-        return self.searchspace.index(param_config)
-
-    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the unvisited cache if it exists."""
-        return self.unvisited_cache.index(param_config)
-
-    def normalize_param_config(self, param_config: tuple) -> tuple:
-        """Normalizes a parameter configuration."""
-        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return normalized
-
-    def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """Denormalizes a parameter configuration."""
-        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return denormalized
-
-    def unprune_param_config(self, param_config: tuple) -> tuple:
-        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
-        unpruned = list()
-        pruned_count = 0
-        for removed in self.removed_tune_params:
-            if removed is not None:
-                unpruned.append(removed)
-            else:
-                unpruned.append(param_config[pruned_count])
-                pruned_count += 1
-        return tuple(unpruned)
-
-    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """Adjust the visited and valid index records accordingly."""
-        validity = self.is_valid(observation)
-        self.__visited_num += 1
-        self.__observations[index] = observation
-        self.__visited_searchspace_indices[index] = True
-        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
-        self.__valid_observation_indices[index] = validity
-        if validity is True:
-            self.__visited_valid_num += 1
-            self.__valid_params.append(param_config)
-            self.__valid_observations.append(observation)
-            if self.is_better_than(observation, self.current_optimum):
-                self.current_optimum = observation
-
-    def predict(self, x) -> Tuple[float, float]:
-        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
-        return self.__model.predict([x], return_std=True)
-
-    def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
-        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
-        with torch.no_grad(), gpytorch.settings.fast_pred_var():
-            # TODO use torch.cuda for GPU
-            test_x = torch.Tensor(lst)
-            observed_pred = self.__likelihood(self.__model(test_x))
-            mu = observed_pred.mean
-            std = observed_pred.variance
-            return mu.numpy(), std.numpy()
-
-    def evaluate_objective_function(self, param_config: tuple) -> float:
-        """Evaluates the objective function."""
-        param_config = self.unprune_param_config(param_config)
-        denormalized_param_config = self.denormalize_param_config(param_config)
-        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
-            return self.invalid_value
-        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-        self.fevals += 1
-        self.add_model_hyperparams_to_result(denormalized_param_config)
-        return val
-
-    def add_model_hyperparams_to_result(self, param_config: tuple):
-        """Add the model parameters (loss and noise) to the results dict at the last result."""
-        # assert that the results index corresponds to the last index
-        assert self.find_config_index_in_results(param_config) == len(self.results) - 1
-
-        for key, value in self.hyperparams.items():
-            # print(f"{key}: {value}")
-            self.results[-1][key] = value
-
-    def find_config_index_in_results(self, param_config: tuple):
-        """Find the index of a parameter configuration in the results. Beware that this can be very slow!"""
-        found_indices = list()
-        for results_index, result_dict in enumerate(self.results):
-            keys = list(result_dict.keys())
-            found = True
-            for index, value in enumerate(param_config):
-                if result_dict[keys[index]] != value:
-                    found = False
-            if found is True:
-                found_indices.append(results_index)
-        assert len(found_indices) == 1
-        return found_indices[0]
-
-    def dimensions(self) -> list:
-        """List of parameter values per parameter."""
-        return self.tune_params.values()
-
-    def draw_random_sample(self) -> Tuple[list, int]:
-        """Draw a random sample from the unvisited parameter configurations."""
-        if len(self.unvisited_cache) < 1:
-            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
-        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
-        param_config = self.unvisited_cache[index]
-        actual_index = self.find_param_config_index(param_config)
-        return param_config, actual_index
-
-    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """Draws an LHS-distributed sample from the search space."""
-        if self.searchspace_size < num_samples:
-            raise ValueError("Can't sample more than the size of the search space")
-        if self.sampling_crit is None:
-            lhs = Lhs(lhs_type="centered", criterion=None)
-        else:
-            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
-        param_configs = lhs.generate(self.dimensions(), num_samples)
-        indices = list()
-        normalized_param_configs = list()
-        for i in range(len(param_configs) - 1):
-            try:
-                param_config = self.normalize_param_config(param_configs[i])
-                index = self.find_param_config_index(param_config)
-                indices.append(index)
-                normalized_param_configs.append(param_config)
-            except ValueError:
-                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
-                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
-                continue
-        return list(zip(normalized_param_configs, indices))
-
-    def train_model_hyperparams(self):
-        """Train the model and likelihood hyperparameters."""
-        # set to training modes
-        self.__model.train()
-        self.__likelihood.train()
-
-        # Use the adam optimizer
-        optimizer = torch.optim.Adam(self.__model.parameters(), lr=0.1)    # Includes GaussianLikelihood parameters
-
-        # "Loss" for GPs - the marginal log likelihood
-        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.__likelihood, self.__model)
-
-        loss = 0
-        for i in range(self.training_iter):
-            # Zero gradients from previous iteration
-            optimizer.zero_grad()
-            # Output from model
-            output = self.__model(self.__tparams)
-            # Calc loss and backprop gradients
-            loss = -mll(output, self.__tobservations)
-            loss.backward()
-            # print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' %
-            #       (i + 1, self.training_iter, loss.item(), self.__model.covar_module.base_kernel.lengthscale.item(), self.__model.likelihood.noise.item()))
-            optimizer.step()
-
-        # set to prediction mode
-        self.__model.eval()
-        self.__likelihood.eval()
-
-        # set the hyperparameters globally for reference
-        self.hyperparams = {
-            'loss': loss.item(),
-            'lengthscale': self.__model.covar_module.base_kernel.lengthscale.item(),
-            'noise': self.__model.likelihood.noise.item(),
-        }
-        # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
-
-    def initial_sample(self):
-        """Draws an initial sample using random sampling."""
-        if self.num_initial_samples <= 0:
-            raise ValueError("At least one initial sample is required")
-        if self.sampling_method == 'lhs':
-            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
-        elif self.sampling_method == 'random':
-            samples = list()
-        else:
-            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
-        # collect the samples
-        collected_samples = 0
-        for params, index in samples:
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            if self.is_valid(observation):
-                collected_samples += 1
-        # collect the remainder of the samples
-        while collected_samples < self.num_initial_samples:
-            params, index = self.draw_random_sample()
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            # check for validity to avoid having no actual initial samples
-            if self.is_valid(observation):
-                collected_samples += 1
-
-        # instantiate the model with the initial sample
-        self.__likelihood = gpytorch.likelihoods.GaussianLikelihood()
-        self.__tparams = torch.Tensor(self.__valid_params)
-        self.__tobservations = torch.Tensor(self.__valid_observations)
-        self.__model = ExactGPModel(self.__tparams, self.__tobservations, self.__likelihood)
-        self.train_model_hyperparams()
-
-        # extract the predictions
-        _, std = self.predict_list(self.unvisited_cache)
-        self.initial_sample_mean = np.mean(self.__valid_observations)
-        # Alternatively:
-        # self.initial_sample_std = np.std(self.__valid_observations)
-        # self.initial_sample_mean = np.mean(predictions)
-        self.initial_std = np.mean(std)
-        self.cv_norm_maximum = self.initial_std
-
-    def contextual_variance(self, std: list):
-        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
-        if not self.af_params['explorationfactor'] == 'CV':
-            return None
-        if self.opt_direction == 'min':
-            if self.current_optimum == self.worst_value:
-                return 0.01
-            if self.current_optimum <= 0:
-                # doesn't work well for minimization beyond 0, should that even be a thing?
-                return abs(np.mean(std) / self.current_optimum)
-            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
-            cv = np.mean(std) / improvement_over_initial_sample
-            # normalize if available
-            if self.cv_norm_maximum:
-                cv = cv / self.cv_norm_maximum
-            return cv
-        return np.mean(std) / self.current_optimum
-
-    def __optimize(self, max_fevals):
-        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
-        while self.fevals < max_fevals:
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
-            list_of_acquisition_values = self.__af(predictions, hyperparam)
-            # afterwards select the best AF value
-            best_af = self.argopt(list_of_acquisition_values)
-            candidate_params = self.unvisited_cache[best_af]
-            candidate_index = self.find_param_config_index(candidate_params)
-            observation = self.evaluate_objective_function(candidate_params)
-            self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.train_model_hyperparams()
-        return self.results
-
-    def __optimize_multi(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
-        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
-        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
-        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
-        discount_factor = self.multi_afs_discount_factor
-        # setup the registration of duplicates and runtimes
-        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
-        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
-        skip_af_index = list()
-        af_runtimes = [0, 0, 0]
-        af_observations = [list(), list(), list()]
-        initial_sample_mean = np.mean(self.__valid_observations)
-        while self.fevals < max_fevals:
-            time_start = time.perf_counter_ns()
-            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
-            aqfs = self.multi_afs
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            time_predictions = time.perf_counter_ns()
-            actual_candidate_params = list()
-            actual_candidate_indices = list()
-            actual_candidate_af_indices = list()
-            duplicate_candidate_af_indices = list()
-            duplicate_candidate_original_af_indices = list()
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                timer_start = time.perf_counter()
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                time_taken = time.perf_counter() - timer_start
-                af_runtimes[af_index] += time_taken
-                is_duplicate = best_af in actual_candidate_indices
-                if not is_duplicate:
-                    candidate_params = self.unvisited_cache[best_af]
-                    actual_candidate_params.append(candidate_params)
-                    actual_candidate_indices.append(best_af)
-                    actual_candidate_af_indices.append(af_index)
-                # register whether the AF suggested a duplicate candidate
-                duplicate_candidate_af_count[af_index].pop(0)
-                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
-                if is_duplicate:
-                    # find the index of the AF that first registered the duplicate
-                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
-                    # register that AF as duplicate as well
-                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
-                    duplicate_candidate_af_indices.append(af_index)
-                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
-            time_afs = time.perf_counter_ns()
-            # evaluate the non-duplicate candidates
-            for index, af_index in enumerate(actual_candidate_af_indices):
-                candidate_params = actual_candidate_params[index]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if observation != self.invalid_value:
-                    # we use the registered observations for maximization of the discounted reward
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
-                else:
-                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
-                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
-            for index, af_index in enumerate(duplicate_candidate_af_indices):
-                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
-                af_observations[af_index].append(original_observation)
-            self.train_model_hyperparams()
-            time_eval = time.perf_counter_ns()
-            # assert that all observation lists of non-skipped acquisition functions are of the same length
-            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
-            # find the AFs elligble for being skipped
-            candidates_for_skip = list()
-            for af_index, count in enumerate(duplicate_candidate_af_count):
-                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
-                    candidates_for_skip.append(af_index)
-            # do not skip the AF with the lowest runtime
-            if len(candidates_for_skip) > 1:
-                candidates_for_skip_discounted = list(
-                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
-                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
-                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
-                for af_index in candidates_for_skip:
-                    if af_index == af_not_to_skip:
-                        # do not skip the AF with the lowest runtime and give it a clean slate
-                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
-                        continue
-                    skip_af_index.append(af_index)
-                    if len(skip_af_index) >= len(aqfs):
-                        raise ValueError("There are no acquisition functions left! This should not happen...")
-            time_af_selection = time.perf_counter_ns()
-
-            # printing timings
-            if self.log_timings:
-                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
-                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
-                time_taken_eval = round(time_eval - time_afs, 3) / 1000
-                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
-                time_taken_total = round(time_af_selection - time_start, 3) / 1000
-                print(
-                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
-                    flush=True)
-        return self.results
-
-    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        aqfs = self.multi_afs
-        discount_factor = self.multi_afs_discount_factor
-        required_improvement_factor = self.multi_afs_required_improvement_factor
-        required_improvement_worse = 1 + required_improvement_factor
-        required_improvement_better = 1 - required_improvement_factor
-        min_required_count = self.af_params['skip_duplicate_after']
-        skip_af_index = list()
-        single_af = len(aqfs) <= len(skip_af_index) + 1
-        af_observations = [list(), list(), list()]
-        af_performs_worse_count = [0, 0, 0]
-        af_performs_better_count = [0, 0, 0]
-        while self.fevals < max_fevals:
-            if single_af:
-                return self.__optimize(max_fevals)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            observations_median = np.median(self.__valid_observations)
-            if increase_precision is False:
-                predictions = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(predictions[1])
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                if increase_precision is True:
-                    predictions = self.predict_list(self.unvisited_cache)
-                    hyperparam = self.contextual_variance(predictions[1])
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                # to avoid going out of bounds on the next iteration, remove the best_af
-                predictions = (np.delete(predictions[0], best_af), np.delete(predictions[1], best_af))
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if increase_precision is True:
-                    self.train_model_hyperparams()
-                # we use the registered observations for maximization of the discounted reward
-                if observation != self.invalid_value:
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[af_index].append(reg_observation)
-                else:
-                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
-                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
-                    af_observations[af_index].append(reg_invalid_observation)
-            if increase_precision is False:
-                self.train_model_hyperparams()
-
-            # calculate the mean of discounted observations over the remaining acquisition functions
-            discounted_obs = list(
-                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
-            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
-
-            # register which AFs perform more than 10% better than average and which more than 10% worse than average
-            for af_index, discounted_observation in enumerate(discounted_obs):
-                if discounted_observation > disc_obs_mean * required_improvement_worse:
-                    af_performs_worse_count[af_index] += 1
-                elif discounted_observation < disc_obs_mean * required_improvement_better:
-                    af_performs_better_count[af_index] += 1
-
-            # find the worst AF, discounted observations is leading for a draw
-            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
-            af_index_worst = -1
-            if worst_count >= min_required_count:
-                for af_index, count in enumerate(af_performs_worse_count):
-                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
-                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
-                        af_index_worst = af_index
-
-            # skip the worst AF
-            if af_index_worst > -1:
-                skip_af_index.append(af_index_worst)
-                # reset the counts to even the playing field for the remaining AFs
-                af_performs_worse_count = [0, 0, 0]
-                af_performs_better_count = [0, 0, 0]
-                # if there is only one AF left, register as single AF
-                if len(aqfs) <= len(skip_af_index) + 1:
-                    single_af = True
-                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-                    assert len(af_indices_left) == 1
-                    self.__af = aqfs[af_indices_left[0]]
-            else:
-                # find the best AF, discounted observations is leading for a draw
-                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
-                af_index_best = -1
-                if best_count >= min_required_count:
-                    for af_index, count in enumerate(af_performs_better_count):
-                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
-                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
-                            af_index_best = af_index
-                # make the best AF single
-                if af_index_best > -1:
-                    single_af = True
-                    self.__af = aqfs[af_index_best]
-
-        return self.results
-
-    def __optimize_multi_fast(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
-        while self.fevals < max_fevals:
-            aqfs = self.multi_afs
-            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            for af in aqfs:
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                del predictions[0][best_af]    # to avoid going out of bounds
-                del predictions[1][best_af]
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.train_model_hyperparams()
-        return self.results
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function returning a randomly shuffled list for comparison."""
-        list_random = range(len(self.unvisited_cache))
-        shuffle(list_random)
-        return list_random
-
-    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Probability of Improvement (PI)."""
-        # prefetch required data
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement
-        list_diff_improvement = -((fplus - x_mu) / (x_std + 1E-9))
-
-        # compute probability of improvement with CDF in bulk
-        list_prob_improvement = norm.cdf(list_diff_improvement)
-        return list_prob_improvement
-
-    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Expected Improvement (EI)."""
-        # prefetch required data
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement, CDF and PDF in bulk
-        list_diff_improvement = (fplus - x_mu) / (x_std + 1E-9)
-        list_cdf = norm.cdf(list_diff_improvement)
-        list_pdf = norm.pdf(list_diff_improvement)
-
-        # compute expected improvement in bulk
-        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
-        return list_exp_improvement
-
-    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (LCB)."""
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        beta = hyperparam
-
-        # compute LCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
-        return list_lower_confidence_bound
-
-    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
-        # prefetch required data
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-
-        # precompute beta parameter
-        zeta = self.af_params['zeta']
-        t = self.fevals
-        d = self.num_dimensions
-        delta = hyperparam
-        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
-
-        # compute UCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
-        return list_lower_confidence_bound
-
-    def visualize_after_opt(self):
-        """Visualize the model after the optimization."""
-        print(self.__model.kernel_.get_params())
-        print(self.__model.log_marginal_likelihood())
-        import matplotlib.pyplot as plt
-        mu, std = self.predict_list(self.searchspace)
-        brute_force_observations = list()
-        for param_config in self.searchspace:
-            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-            if obs == self.invalid_value:
-                obs = None
-            brute_force_observations.append(obs)
-        x_axis = range(len(mu))
-        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
-        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
-        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
-        plt.legend()
-        plt.show()
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
deleted file mode 100644
index d584c0e3b..000000000
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ /dev/null
@@ -1,1084 +0,0 @@
-"""Lean implementation of Bayesian Optimization with GPyTorch."""
-# python
-import ast  # for casting strings to dict
-import warnings
-from copy import deepcopy
-from math import ceil
-from random import choice, randint, shuffle
-from typing import Tuple
-
-# external
-import numpy as np
-from numpy.random import default_rng
-
-from kernel_tuner.runners.runner import Runner
-from kernel_tuner.searchspace import Searchspace
-
-# optional
-try:
-    import gpytorch
-    import torch
-    # import arviz as az
-    bayes_opt_present = True
-
-    from torch import Tensor
-
-    class ExactGPModel(gpytorch.models.ExactGP):
-        def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
-            super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
-            self.mean_module = gpytorch.means.ZeroMean()
-            if cov_kernel_name == 'matern':
-                self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
-            elif cov_kernel_name == 'matern_scalekernel':
-                self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
-
-        def forward(self, x):
-            mean_x = self.mean_module(x)
-            covar_x = self.covar_module(x)
-            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-except ImportError:
-    bayes_opt_present = False
-
-    class Tensor():
-        pass
-
-    class ExactGPModel():
-        def __init__(self, train_x, train_y, likelihood):
-            raise ImportError("GPyTorch not imported")
-        def forward(self, x):
-            raise ImportError("GPyTorch not imported")
-
-
-# set supported hyperparameter values
-supported_precisions = ['float', 'double']
-supported_initial_sample_methods = ['lhs', 'index', 'minmax','random']
-supported_methods = ['ei', 'poi', 'random']
-supported_cov_kernels = ['matern', 'matern_scalekernel']
-supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
-supported_optimizers = ['LBFGS', 'Adam', 'AdamW', 'Adagrad', 'ASGD']
-
-
-# set complex hyperparameter defaults
-def default_optimizer_learningrates(key):
-    defaults = {
-        'LBFGS': 1,
-        'Adam': 0.001,
-        'AdamW': 0.001,
-        'ASGD': 0.01,
-        'Adagrad': 0.01
-    }
-    return defaults[key]
-
-
-def tune(searchspace: Searchspace, runner: Runner, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    if not bayes_opt_present:
-        raise ImportError(
-            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
-        )
-
-    # set CUDA availability
-    use_cuda = False
-    cuda_available = torch.cuda.is_available() and use_cuda
-    device = torch.device("cuda:0" if cuda_available else "cpu")
-    if cuda_available:
-        print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
-
-    # retrieve options with defaults
-    options = tuning_options.strategy_options
-    optimization_direction = options.get("optimization_direction", 'min')
-    num_initial_samples = int(options.get("popsize", 20))
-    max_fevals = int(options.get("max_fevals", 220))
-
-    # enabling scaling will unscale and snap inputs on evaluation, more efficient to scale all at once and keep unscaled values
-    tuning_options["snap"] = False
-    tuning_options["scaling"] = False
-
-    # prune the search space using restrictions
-    parameter_space = searchspace.list.copy()
-
-    # limit max_fevals to max size of the parameter space
-    max_fevals = min(len(parameter_space), max_fevals)
-    if max_fevals < num_initial_samples:
-        raise ValueError(
-            f"Maximum number of function evaluations ({max_fevals}) can not be lower than or equal to the number of initial samples ({num_initial_samples}), you might as well brute-force."
-        )
-
-    # execute Bayesian Optimization
-    BO = BayesianOptimization(parameter_space, tuning_options, runner, num_initial_samples, optimization_direction, device)
-    all_results = BO.optimize(max_fevals)
-
-    return all_results, runner.dev.get_environment()
-
-
-class BayesianOptimization:
-
-    def __init__(self, parameter_space: list, tuning_options, runner: Runner, num_initial_samples: int, optimization_direction: str,
-                 device) -> None:
-        self.animate = False    # TODO remove
-
-        # set defaults
-        self.num_initial_samples = num_initial_samples
-        self.fevals = 0
-        self.all_results = []
-        self.unique_results = {}
-        self.current_optimal_config = None
-
-        # set Kernel Tuner data
-        self.tuning_options = tuning_options
-        self.runner = runner
-        self.max_threads = runner.dev.max_threads
-
-        # get precision options
-        self.dtype = torch.float if self.get_hyperparam("precision", "float", supported_precisions) == "float" else torch.double
-        self.min_std = self.get_hyperparam("minimum_std", 1e-6, type=float)
-
-        # get tuning options
-        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
-        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)    # 0.1
-        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)    # 5
-        self.training_after_iter = self.get_hyperparam("trainingafteriter", 1, type=int)    # 1
-        self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
-        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5, type=float)
-        self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
-        self.optimizer_name = self.get_hyperparam("optimizer", "LBFGS", supported_optimizers)
-        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", self.optimizer_name, type=float, cast=default_optimizer_learningrates)
-        acquisition_function_name = self.get_hyperparam("method", "ei", supported_methods)
-        af_params = self.get_hyperparam("methodparams", {}, type=dict, cast=ast.literal_eval)
-
-        # set acquisition function options
-        self.set_acquisition_function(acquisition_function_name)
-        if 'explorationfactor' not in af_params:
-            af_params['explorationfactor'] = 0.1    # 0.1
-        self.af_params = af_params
-
-        # set Tensors
-        self.device: torch.device = device
-        self.out_device = torch.device("cpu")
-        self.size = len(parameter_space)
-        self.index_counter = torch.arange(self.size)
-        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
-        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
-        self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
-        self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
-        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan    # x (param configs) and y (results) must be the same type
-        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device)    # only a valid assumption if outputs are normalized
-
-        # transform non-numerical parameters to numerical, keep true_param_configs for evaluation function
-        self.param_configs, self.tune_params = self.transform_nonnumerical_params(parameter_space)
-        self.true_param_configs = parameter_space
-
-        # set scaling
-        self.scaled_input = True
-        self.scaled_output = True
-        if not self.scaled_input:
-            self.param_configs_scaled = self.param_configs
-        else:
-            self.apply_scaling_to_inputs()
-
-        # set optimization settings
-        self.invalid_value = 1e20
-        self.optimization_direction = optimization_direction
-        if self.optimization_direction == 'min':
-            self.is_better_than = lambda a, b: a < b
-            self.inf_value = np.PINF
-            self.opt = torch.min
-            self.argopt = torch.argmin
-        elif self.optimization_direction == 'max':
-            self.is_better_than = lambda a, b: a > b
-            self.inf_value = np.NINF
-            self.opt = torch.max
-            self.argopt = torch.argmax
-        else:
-            raise ValueError(f"Invalid optimization direction {self.optimization_direction}")
-
-        # set the model
-        self.current_optimum = self.inf_value
-        self.hyperparams = {
-            'loss': np.nan,
-            'lengthscale': np.nan,
-            'noise': np.nan,
-        }
-        self.hyperparams_means = {
-            'loss': np.array([]),
-            'lengthscale': np.array([]),
-            'noise': np.array([]),
-        }
-
-        # initialize the model
-        if not self.runner.simulation_mode:
-            self.import_cached_evaluations()
-        self.initialize_model()
-
-    @property
-    def train_x(self):
-        """Get the valid parameter configurations."""
-        return self.param_configs_scaled[self.valid_configs].to(self.device)
-
-    @property
-    def train_y(self):
-        """Get the valid results."""
-        outputs = self.results[self.valid_configs]
-        if self.scaled_output:
-            # z-score, remove mean and make unit variance to scale it to N(0,1)
-            # alternatively, first min-max the outputs between -1 and +1 and apply a Fisher transformation (np.arctanh)
-            outputs = (outputs - outputs.mean()) / outputs.std()
-        return outputs
-
-    @property
-    def train_y_err(self):
-        """Get the error on the valid results."""
-        std = self.results_std[self.valid_configs]
-        if self.scaled_output and std.std() > 0.0:
-            std = (std - std.mean()) / std.std()    # use z-score to get normalized variability
-        return std
-
-    @property
-    def test_x(self):
-        """Get the not yet visited parameter configurations."""
-        return self.param_configs_scaled[self.unvisited_configs].to(self.device)
-
-    @property
-    def test_x_unscaled(self):
-        """Get the unscaled, not yet visited parameter configurations."""
-        return self.param_configs[self.unvisited_configs]
-
-    @property
-    def test_y_err(self):
-        """Get the expected error on the test set."""
-        train_y_err = self.train_y_err
-        return torch.full((self.size - len(train_y_err), ), torch.mean(train_y_err))
-
-    @property
-    def invalid_x(self):
-        """Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid)."""
-        invalid_mask = (self.unvisited_configs == self.valid_configs)
-        return self.param_configs[invalid_mask]
-
-    def true_param_config_index(self, target_index: int) -> int:
-        """The index required to get the true config param index when dealing with test_x."""
-        # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
-        masked_counter = self.index_counter[self.unvisited_configs]
-        return masked_counter[target_index]
-
-    def true_param_config_indices(self, target_indices: Tensor) -> Tensor:
-        """Same as true_param_config_index, but for an array of targets instead."""
-        masked_counter = self.index_counter[self.unvisited_configs]
-        return masked_counter.index_select(0, target_indices)
-
-    def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
-        """Initialize the surrogate model."""
-        # self.initial_sample_std = self.min_std
-        if take_initial_sample:
-            self.initial_sample()
-
-        # create the model
-        if self.likelihood_name == 'Gaussian':
-            self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
-        elif self.likelihood_name == 'GaussianPrior':
-            raise NotImplementedError("Gaussian Prior likelihood has not been implemented yet")
-        elif self.likelihood_name == 'FixedNoise':
-            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=True)
-        self.likelihood = self.likelihood.to(self.device)
-        self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
-
-        # Find optimal model hyperparameters
-        self.model.train()
-        self.likelihood.train()
-        model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
-
-        # set the optimizer
-        # LBFGS is probably better as Adam is first-order
-        if self.optimizer_name == 'LBFGS':
-            self.optimizer = torch.optim.LBFGS(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'Adam':
-            self.optimizer = torch.optim.Adam(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'AdamW':
-            self.optimizer = torch.optim.AdamW(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'ASGD':
-            self.optimizer = torch.optim.ASGD(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'Adagrad':
-            self.optimizer = torch.optim.Adagrad(model_parameters, lr=self.optimizer_learningrate)
-
-        self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model).to(self.device)
-        if train_hyperparams:
-            self.train_hyperparams(self.initial_training_iter)
-        else:
-            self.train_hyperparams(0)
-
-    def import_cached_evaluations(self):
-        """Import the previously evaluated configurations into this run."""
-        # make strings of all the parameter configurations in the search space
-        param_config_strings = list()
-        for param_config in self.true_param_configs:
-            param_config_strings.append(",".join([str(v) for v in param_config]))
-
-        # load the results from the cache into the run
-        cache = self.tuning_options.cache
-        if len(cache.keys()) > 0:
-            print("Previous cachefile found while not in simulation mode, importing previous evaluations.")
-        for param_config_string, result in cache.items():
-            # get the index of the string in the search space
-            param_config_index = param_config_strings.index(param_config_string)
-            time = self.evaluate_config(param_config_index)
-            assert time == result['time']
-        print(f"Imported {len(self.all_results)} previously evaluated configurations.")
-
-    def initial_sample(self):
-        """Take an initial sample of the parameter space."""
-        list_param_config_indices = list(self.index_counter[~self.unvisited_configs])
-
-        # generate a random offset from a normal distribution to add to the sample indices
-        rng = default_rng()
-        if self.initial_sample_random_offset_factor > 0.5:
-            raise ValueError("Random offset factor should not be greater than 0.5 to avoid overlapping index offsets")
-        random_offset_size = (self.size / self.num_initial_samples) * self.initial_sample_random_offset_factor
-        random_offsets = np.round(rng.standard_normal(self.num_initial_samples) * random_offset_size)
-
-        # first apply the initial sampling method
-        if self.initial_sample_method == 'lhs' and self.num_initial_samples - self.fevals > 1:
-            indices = self.get_lhs_samples(random_offsets)
-            for param_config_index in indices.tolist():
-                if param_config_index in list_param_config_indices:
-                    continue
-                list_param_config_indices.append(param_config_index)
-                self.evaluate_config(param_config_index)
-        elif self.initial_sample_method == 'random':
-            while self.fevals < self.num_initial_samples:
-                param_config_index = randint(0, self.size - 1)
-                if param_config_index in list_param_config_indices:
-                    continue
-                list_param_config_indices.append(param_config_index)
-                self.evaluate_config(param_config_index)
-        elif self.initial_sample_method == 'minmax':
-            list_param_config_indices += self.take_min_max_initial_samples(list_param_config_indices)
-
-        # then take index-spaced samples until all samples are valid
-        while self.fevals < self.num_initial_samples:
-            least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size - 1)
-            if param_config_index in list_param_config_indices:
-                warnings.warn(
-                    f"An already evaluated configuration ({param_config_index}) was selected for index-spaced sampling. " +
-                    "If this happens regularly, reduce the initial sample random offset factor.", AlreadyEvaluatedConflict)
-                param_config_index = least_evaluated_region_index
-            list_param_config_indices.append(param_config_index)
-            self.evaluate_config(param_config_index)
-
-        # set the current optimum, initial sample mean and initial sample std
-        self.current_optimum = self.opt(self.train_y).item()
-        self.initial_sample_mean = self.train_y.mean().item()
-        self.initial_sample_std = self.train_y.std().item()
-        # self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
-
-        # save a boolean mask of the initial samples
-        self.inital_sample_configs = self.valid_configs.detach().clone()
-
-    def get_lhs_samples(self, random_offsets: np.ndarray) -> Tensor:
-        """Get a centered Latin Hypercube Sample with a random offset."""
-        n_samples = self.num_initial_samples - self.fevals
-
-        # first get the seperate parameter values to make possibly fictional distributed parameter configurations
-        temp_param_configs = [[] for _ in range(n_samples)]
-        for param_values in self.tune_params.values():
-            l = len(param_values)
-
-            # determine the interval and offset
-            interval = l / n_samples
-            offset = 0
-            if l > n_samples:
-                # take the difference between the last index and the end of the list, and the first index and the start of the list
-                offset = ((l - 1 - interval * n_samples) - interval) / 2
-
-            # assemble the parameter configurations
-            for i in range(n_samples):
-                index = ceil(offset + interval * (i + 1)) - 1
-                temp_param_configs[i].append(param_values[index])
-
-        # create a tensor of the possibly fictional parameter configurations
-        param_configs = torch.tensor(list(tuple(param_config) for param_config in temp_param_configs), dtype=self.dtype).to(self.device)
-        param_configs = param_configs.unique(dim=0)    # remove duplicates
-        n_samples_unique = len(param_configs)
-
-        # get the indices of the parameter configurations
-        num_params = len(self.param_configs[0])
-        minimum_required_num_matching_params = round(num_params *
-                                                     0.75)    # set the number of parameter matches allowed to be dropped before the search is stopped
-        param_configs_indices = torch.full((n_samples_unique, ), -1, dtype=torch.int)
-        for selected_index, selected_param_config in enumerate(param_configs):
-            # for each parameter configuration, count the number of matching parameters
-            required_num_matching_params = num_params
-            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
-            match_mask = (matching_params == required_num_matching_params)
-            # if there is not at least one matching parameter configuration, lower the required number of matching parameters
-            found_num_matching_param_configs = match_mask.count_nonzero()
-            while found_num_matching_param_configs < 1 and required_num_matching_params > minimum_required_num_matching_params:
-                required_num_matching_params -= 1
-                match_mask = (matching_params == required_num_matching_params)
-                found_num_matching_param_configs = match_mask.count_nonzero()
-
-            # if more than one possible parameter configuration has been found, pick a random one
-            if found_num_matching_param_configs > 1:
-                index = choice(self.index_counter[match_mask])
-            elif found_num_matching_param_configs == 1:
-                index = self.index_counter[match_mask].item()
-            else:
-                # if no matching parameter configurations were found
-                continue
-
-            # set the selected index
-            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size - 1)
-
-        # filter -1 indices and duplicates that occurred because of the random offset
-        param_configs_indices = param_configs_indices[param_configs_indices >= 0]
-        param_configs_indices = param_configs_indices.unique().type(torch.int)
-        if len(param_configs_indices) < n_samples / 2:
-            warnings.warn(
-                str(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1." +
-                    f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params})." +
-                    "Perhaps try something other than LHS."))
-        return param_configs_indices
-
-    def take_min_max_initial_samples(self, list_param_config_indices: list, samples_per_parameter=1) -> list:
-        """Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters."""
-        # number of samples required is at least (samples_per_parameter) * (number of parameters) + 1
-
-        # first get the individual parameter values and sort them
-        params_values = list(self.tune_params.values())
-        for param_values in params_values:
-            param_values.sort()
-
-        number_of_params = len(params_values)
-        if self.num_initial_samples - self.fevals < samples_per_parameter * number_of_params + 1:
-            raise ValueError(f"There are not enough initial samples available ({self.num_initial_samples - self.fevals}) to do minmax initial sampling. At least {samples_per_parameter * number_of_params + 1} samples are required.")
-
-        # then take the minimum parameter configuration using BFS, this is used as the base
-        # instead of BFS, you could also search for the minimal sum of indices
-        minimum_index = None
-        param_level = 0
-        param_moving_index = -1
-        while minimum_index is None and self.num_initial_samples - self.fevals:
-            # create the minimum base configuration and find it in the search space
-            selected_param_config = torch.tensor(tuple(param_values[param_level+1] if param_index == param_moving_index else param_values[min(param_level, len(param_values)-1)] for param_index, param_values in enumerate(params_values)), dtype=self.dtype).to(self.device)
-            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
-            match_mask = (matching_params == number_of_params)
-            found_num_matching_param_configs = match_mask.count_nonzero()
-            temp_index = self.index_counter[match_mask]
-            # check if the configuration exists and is succesfully evaluated
-            if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
-                minimum_index = temp_index.item()
-                minimum_config = self.param_configs[minimum_index]
-                if minimum_index not in list_param_config_indices:
-                    list_param_config_indices.append(minimum_index)
-            # if it doesn't exist and evaluate, do a breadth-first search for the minimum configuration
-            else:
-                proceed = False
-                while not proceed:
-                    # first look at the current level
-                    if param_moving_index < len(params_values) - 1:
-                        param_moving_index += 1
-                        # if the param_level + 1 exceeds the number of parameters, try the next parameter
-                        if len(params_values[param_moving_index]) <= param_level + 1:
-                            param_moving_index += 1
-                        else:
-                            proceed = True
-                    # if nothing is found, proceed to the next level
-                    else:
-                        param_level += 1
-                        param_moving_index = -1
-                        proceed = True
-        if minimum_index is None:
-            raise ValueError(f"Could not evaluate the minimum base configuration in {self.num_initial_samples} samples.")
-
-        # next take the maximum for each individual parameter using DFS
-        for param_index, param_values in enumerate(params_values):
-            if len(param_values) <= 1:
-                continue
-            maximum_index = None
-            param_moving_level = len(param_values) - 1
-            while maximum_index is None and self.num_initial_samples - self.fevals > 0:
-                # take the minimum configuration as base
-                selected_param_config = minimum_config.clone()
-                # change only the currently selected parameter and look up the configuration in the search space
-                selected_param_config[param_index] = param_values[param_moving_level]
-                matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
-                match_mask = (matching_params == number_of_params)
-                found_num_matching_param_configs = match_mask.count_nonzero()
-                temp_index = self.index_counter[match_mask]
-                if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
-                    maximum_index = temp_index.item()
-                    if maximum_index not in list_param_config_indices:
-                        list_param_config_indices.append(maximum_index)
-                # if it doesn't exist and evaluate, move one parameter value down
-                else:
-                    param_moving_level -= 1
-                    if param_moving_level < 0:
-                        raise ValueError(f"No instance of parameter {param_index} is present in the search space and succesfully evaluated")
-            if maximum_index is None:
-                raise ValueError(f"Could not evaluate the maximum configuration for {param_index+1} out of {len(params_values)} within {self.num_initial_samples} samples.")
-
-        return list_param_config_indices
-
-    def get_middle_index_of_least_evaluated_region(self) -> int:
-        """Get the middle index of the region of parameter configurations that is the least visited."""
-        # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
-        distance_tensor = torch.arange(self.size)
-
-        # first get the indices that were visited (must be in ascending order)
-        indices_visited = self.index_counter[~self.unvisited_configs]
-
-        # then reset the range after the visited index
-        for index_visited in indices_visited:
-            distance_tensor[index_visited:] = torch.arange(self.size - index_visited)
-
-        biggest_distance_index = distance_tensor.argmax()
-        biggest_distance = distance_tensor[biggest_distance_index].item()
-        middle_index = biggest_distance_index - round(biggest_distance / 2)
-        # print(f"Max distance {biggest_distance}, index: {middle_index}, between: {biggest_distance_index-biggest_distance}-{biggest_distance_index}")
-        return middle_index
-
-    def train_hyperparams(self, training_iter: int):
-        """Optimize the surrogate model hyperparameters iteratively."""
-        self.model.train()
-        self.likelihood.train()
-
-        def closure():
-            self.optimizer.zero_grad()
-            output = self.model(self.train_x)    # get model output
-            try:
-                loss = -self.mll(output, self.train_y)    # calculate loss and backprop gradients
-                loss.backward()
-                # large sudden increase in loss signals numerical instability
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore", category=RuntimeWarning)
-                    no_nan_losses = self.hyperparams_means['loss'][~np.isnan(self.hyperparams_means['loss'])]
-                    if len(no_nan_losses) > 1 and loss.item() > np.mean(no_nan_losses) * 2:
-                        warnings.warn("Avoiding loss surge, aborting training", AvoidedLossSurgeWarning)
-                        return np.nan
-                return loss
-            except gpytorch.utils.errors.NotPSDError:
-                warnings.warn("Matrix not positive definite during training", NotPSDTrainingWarning)
-                return np.nan
-            except RuntimeError as e:
-                warnings.warn(str(e), RuntimeWarning)
-
-        loss = None
-        for _ in range(training_iter):
-            try:
-                _loss = self.optimizer.step(closure)
-                if _loss is np.nan:
-                    break
-                loss = _loss
-            except gpytorch.utils.errors.NanError:
-                warnings.warn("PSD_safe_Cholesky failed due to too many NaN", NaNTrainingWarning)
-                break
-            except TypeError as e:
-                warnings.warn(str(e), RuntimeWarning)
-                break
-
-        # set the hyperparams to the new values
-        try:
-            lengthscale = float(self.model.covar_module.lengthscale.item())
-        except AttributeError:
-            lengthscale = float(self.model.covar_module.base_kernel.lengthscale.item())
-        loss = float(loss.item()) if loss is not None else np.nan
-        noise = float(self.model.likelihood.noise.mean().detach())
-        self.hyperparams = {
-            'loss': loss,
-            'lengthscale': lengthscale,
-            'noise': noise,
-        }
-        self.hyperparams_means['loss'] = np.append(self.hyperparams_means['loss'], loss)
-        self.hyperparams_means['lengthscale'] = np.append(self.hyperparams_means['lengthscale'], lengthscale)
-        self.hyperparams_means['noise'] = np.append(self.hyperparams_means['noise'], noise)
-
-        # get into evaluation (predictive posterior) mode
-        self.model.eval()
-        self.likelihood.eval()
-
-    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
-        """Optimize the objective."""
-        predictions_tuple = None
-        short_param_config_index = None
-        last_invalid = False
-        report_multiple_minima = ceil(round(self.size / 10))    # if more than 10% of the space is minima, print a warning
-        use_contextual_variance = self.af_params['explorationfactor'] == 'CV'
-        while self.fevals < max_fevals:
-            if last_invalid:
-                # TODO no need to get the predictions again as the predictions are unchanged, just set the invalid param config mean to the worst non-NAN value and the std to 0
-                # predictions_tuple[0][short_param_config_index] = torch.nanmean(predictions_tuple[0])
-                # predictions_tuple[1][short_param_config_index] = 0
-                predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
-            else:
-                predictions_tuple = self.predict_list()
-                # if self.initial_sample_std <= self.min_std:
-                # self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
-            # if there are NaN or all of the predicted std are the same, take from the least evaluated region
-            mean_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[0])).item())
-            std_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[1])).item())
-            if mean_has_NaN or std_has_NaN or torch.all(predictions_tuple[1] == predictions_tuple[1][0]):
-                least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-                param_config_index = least_evaluated_region_index
-                short_param_config_index = -1
-                if mean_has_NaN:
-                    warning_reason = "there were NaN in the predicted mean"
-                elif std_has_NaN:
-                    warning_reason = "there were NaN in the predicted std"
-                else:
-                    warning_reason = "all STDs were the same"
-                warnings.warn(
-                    f"After {self.fevals}/{max_fevals} fevals, {warning_reason}, picking one from the least evaluated region and resetting the surrogate model",
-                    ResetModelWarning)
-                self.initialize_model(take_initial_sample=False, train_hyperparams=True)
-            else:
-                # otherwise, optimize the acquisition function to find the next candidate
-                hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
-                acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
-                short_param_config_index = self.argopt(acquisition_values)
-                param_config_index = self.true_param_config_index(short_param_config_index)
-
-                # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
-                min_acquisition_function_value = acquisition_values[short_param_config_index]
-                indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
-                if len(indices_where_min) > 1:
-                    # first get the true index for the minima
-                    true_indices_where_min = self.true_param_config_indices(indices_where_min)
-                    # then get the index of the least evaluated region
-                    least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-                    # now find the minima closest to the least evaluated region
-                    param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
-                    short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
-                    if len(indices_where_min) > report_multiple_minima:
-                        warnings.warn(
-                            f"After {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region",
-                            MultipleMinimaWarning)
-
-            # evaluate and register the result
-            result = self.evaluate_config(param_config_index)
-            if result == self.invalid_value and short_param_config_index > -1:
-                # can't use last_invalid if short_param_config_index is not set
-                last_invalid = True
-            else:
-                last_invalid = False
-                self.model.set_train_data(self.train_x, self.train_y, strict=False)
-                # do not train if there are multiple minima, because it introduces numerical instability or insolvability
-                if self.training_after_iter > 0 and (self.fevals % self.training_after_iter == 0):
-                    self.train_hyperparams(training_iter=1)    # TODO experiment with other training iter
-                # set the current optimum
-                self.current_optimum = self.opt(self.train_y).item()
-            # print(f"Valid: {len(self.train_x)}, unvisited: {len(self.test_x)}, invalid: {len(self.invalid_x)}, last invalid: {last_invalid}")
-            if self.animate:
-                self.visualize()
-
-        return self.all_results
-
-    def objective_function(self, param_config: tuple) -> float:
-        return self.runner.run([param_config], self.tuning_options)
-
-    def evaluate_config(self, param_config_index: int) -> float:
-        """Evaluates a parameter configuration, returns the time."""
-        param_config = self.true_param_configs[param_config_index]
-        time = self.objective_function(param_config)
-        self.register_result(time, param_config_index)
-        self.update_unique_results()
-        self.fevals = len(self.unique_results)
-        return time
-
-    def register_result(self, result: float, param_config_index: int):
-        """Registers the result to the Tensors and adds the hyperparameters to the results dict."""
-        # set the unvisited Tensors
-        if self.unvisited_configs[param_config_index] is False:
-            raise ValueError(f"The param config index {param_config_index} was already set to False!")
-        self.unvisited_configs[param_config_index] = False
-
-        # set the results Tensors
-        last_result = self.all_results[-1]
-        if result != self.invalid_value:
-            self.valid_configs[param_config_index] = True
-            self.results[param_config_index] = result
-            # assert last_result['time'] == result TODO remove
-            self.results_std[param_config_index] = max(np.std(last_result['times']), self.min_std)
-
-        # add the current model parameters to the last entry of the results dict
-        if len(self.all_results) < 1:
-            return
-        for key, value in self.hyperparams.items():
-            last_result["hyperparam_" + key] = value
-        self.all_results[-1] = last_result
-        # TODO check if it is possible to write the results with hyperparameters to the cache if not in simulation mode, maybe with observer?
-
-    def update_unique_results(self):
-        """Updates the unique results dictionary."""
-        record = self.all_results[-1]
-        # make a unique string by taking every value in a result, if it already exists, it is overwritten
-        self.unique_results.update({",".join([str(v) for k, v in record.items() if k in self.tuning_options.tune_params]): record["time"]})
-
-    def predict_list(self) -> Tuple[Tensor, Tensor]:
-        """Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations."""
-        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
-            try:
-                observed_pred = self.likelihood(self.model(self.test_x))
-                mu = observed_pred.mean
-                std = observed_pred.variance.clamp(min=self.min_std)    # TODO .sqrt() or not? looks like without is better
-                return mu, std
-            except gpytorch.utils.errors.NanError:
-                warnings.warn("NaN error during predictions", NaNPredictionWarning)
-                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
-            except gpytorch.utils.errors.NotPSDError:
-                warnings.warn("NotPSD error during predictions", NotPSDPredictionWarning)
-                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
-            except RuntimeError as e:
-                warnings.warn(str(e), RuntimeWarning)
-                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
-
-    def get_diff_improvement(self, y_mu, y_std, fplus) -> Tensor:
-        """Compute probability of improvement by assuming normality on the difference in improvement."""
-        diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
-        diff_improvement = (diff_improvement - diff_improvement.mean()) / max(diff_improvement.std(), self.min_std)    # force to N(0,1) with z-score
-        if self.optimization_direction == 'max':
-            diff_improvement = -diff_improvement
-        return diff_improvement
-
-    def contextual_variance(self, mean: Tensor, std: Tensor):
-        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
-        if not self.af_params['explorationfactor'] == 'CV':
-            raise ValueError(f"Contextual Variance was called, but is not set as the exploration factor ({self.af_params['explorationfactor']})")
-        if self.optimization_direction == 'max':
-            raise NotImplementedError("Contextual Variance has not yet been implemented for maximisation")
-        if self.current_optimum == self.inf_value:
-            return 0.01
-        if self.scaled_output:
-            improvement_over_initial_sample = (abs(self.current_optimum) - self.initial_sample_mean) / self.initial_sample_std
-            improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
-            improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
-            # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
-            # x = 1 - max(max(1 - improvement_diff, 0.2), 0.0)
-            x = 1 - max(min(improvement_diff, 1) * 0.2, 0.0)
-            # the smaller the difference between the initial sample error and current sample error, the greater the exploration
-            # x = 1 - min(max(self.initial_sample_std - std.mean().item(), 1.0), 0.8)
-            # print(self.initial_sample_std, std.mean().item())
-            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2, y=0.003; at x=0.2057, y=0.0.
-            return cv
-        else:
-            raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function returning a randomly shuffled list for comparison."""
-        list_random = list(range(len(self.unvisited_param_configs)))
-        shuffle(list_random)
-        return list_random
-
-    def af_probability_of_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
-        """Acquisition function Probability of Improvement (PoI) tensor-based."""
-        # prefetch required data
-        y_mu, y_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
-        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
-        cdf = normal.cdf(diff_improvement)
-
-        # # sanity check
-        # if torch.all(cdf == cdf[0]):
-        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
-        return cdf
-
-    def af_expected_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
-        """Acquisition function Expected Improvement (EI) tensor-based."""
-        # prefetch required data
-        y_mu, y_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
-        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
-        cdf = normal.cdf(diff_improvement)
-        pdf = torch.exp(normal.log_prob(diff_improvement))
-
-        # # sanity check
-        # if torch.all(cdf == cdf[0]) and torch.all(pdf == pdf[0]):
-        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
-
-        # compute expected improvement in bulk
-        exp_improvement = (pdf + diff_improvement + y_std * cdf)
-        # alternative exp_improvement = y_std * (pdf + diff_improvement * cdf)
-        # alternative exp_improvement = -((fplus - y_mu) * cdf + y_std * pdf)
-        return exp_improvement
-
-    """                  """
-    """ Helper functions """
-    """                  """
-
-    def apply_scaling_to_inputs(self):
-        """Scale the inputs using min-max normalization (0-1) and remove constant parameters."""
-        param_configs_scaled = torch.zeros_like(self.param_configs)
-
-        # first get the scaling factors of each parameter
-        v_min_list = list()
-        v_diff_list = list()
-        unchanging_params_list = list()
-        for param_values in self.tune_params.values():
-            v_min = min(param_values)
-            v_max = max(param_values)
-            v_min_list.append(v_min)
-            v_diff_list.append(v_max - v_min)
-            unchanging_params_list.append(v_min == v_max)
-
-        # then set each parameter value to the scaled value
-        for param_index in range(len(self.param_configs[0])):
-            v_min = v_min_list[param_index]
-            v_diff = v_diff_list[param_index]
-            param_configs_scaled[:, param_index] = torch.sub(self.param_configs[:, param_index], v_min).div(v_diff)
-
-        # finally remove parameters that are constant by applying a mask
-        unchanging_params_tensor = ~torch.tensor(unchanging_params_list, dtype=torch.bool)
-        # if torch.all(unchanging_params_tensor == False):
-        # raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
-        nonstatic_param_count = torch.count_nonzero(unchanging_params_tensor)
-        self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
-        for param_config_index, param_config in enumerate(param_configs_scaled):
-            self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
-        self.nonstatic_params = unchanging_params_tensor
-
-    def find_nearest(self, value, array: Tensor):
-        """Find the value nearest to the given value in the array."""
-        index = (torch.abs(array - value)).argmin()
-        return array[index]
-
-    def get_hyperparam(self, name: str, default, supported_values=list(), type=None, cast=None):
-        """Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function."""
-        value = self.tuning_options.strategy_options.get(name, default)
-
-        # check with predifined value list
-        if len(supported_values) > 0 and value not in supported_values:
-            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-        # cast to type if provided
-        if type and not isinstance(value, type):
-            if cast:
-                value = cast(value)
-            else:
-                value = type(value)
-
-        # exceptions with more complex types
-        if value == 'methodparams' and 'explorationfactor' in value and value['explorationfactor'] != 'CV':
-            value = float(value)
-        return value
-
-    def remove_from_predict_list(self, p: Tuple[Tensor, Tensor], i: int) -> Tuple[Tensor, Tensor]:
-        """Remove an index from a tuple of predictions."""
-        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """Set the acquisition function based on the name."""
-        if acquisition_function not in supported_methods:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-        if acquisition_function == 'poi':
-            self.acquisition_function = self.af_probability_of_improvement_tensor
-        elif acquisition_function == 'ei':
-            self.acquisition_function = self.af_expected_improvement_tensor
-        elif acquisition_function == 'random':
-            self.acquisition_function = self.af_random
-
-    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[Tensor, dict]:
-        """Transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params."""
-        parameter_space = deepcopy(parameter_space)
-        number_of_params = len(parameter_space[0])
-
-        # find out which parameters have nonnumerical or mixed types, and create a range of integers instead
-        nonnumericals_exist = False
-        nonnumerical_type = torch.zeros(number_of_params, dtype=torch.bool)
-        nonnumerical_values = [[] for _ in range(number_of_params)]
-        tune_params = deepcopy(self.tuning_options.tune_params)
-        for param_index, (param_key, param_values) in enumerate(self.tuning_options.tune_params.items()):
-            if not all(isinstance(v, (int, float, complex)) for v in param_values):
-                nonnumericals_exist = True
-                nonnumerical_type[param_index] = True
-                nonnumerical_values[param_index] = param_values
-                tune_params[param_key] = range(len(param_values))
-
-        # overwrite the nonnumerical parameters with numerical parameters
-        if nonnumericals_exist:
-            self.tuning_options["snap"] = False    # snapping is only possible with numerical values
-            for param_config_index, param_config in enumerate(parameter_space):
-                parameter_space[param_config_index] = list(param_config)
-                for param_index, param_value in enumerate(param_config):
-                    if nonnumerical_type[param_index]:
-                        # just use the index of the non-numerical value instead of the value
-                        new_value = nonnumerical_values[param_index].index(param_value)
-                        parameter_space[param_config_index][param_index] = new_value
-
-        return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
-
-    def visualize(self):
-        """Visualize the surrogate model and observations in a plot."""
-        if self.fevals < 220:
-            return None
-        from matplotlib import pyplot as plt
-        with torch.no_grad(), gpytorch.settings.fast_pred_var():
-            # Initialize plot
-            f = plt.figure(constrained_layout=True, figsize=(10, 8))
-            subfigures = f.subfigures(2, 1)
-            ax = subfigures[0].subplots(1, 1)
-            axes2 = subfigures[1].subplots(1, 3)
-            ax.set_ylabel('Value')
-            ax.set_xlabel('Parameter')
-
-            param_configs = self.true_param_configs
-
-            # get true function
-            objective_results = np.array([])
-            for param_config in param_configs:
-                result = self.objective_function(tuple(param_config))
-                if result == self.invalid_value:
-                    result = np.nan
-                objective_results = np.append(objective_results, result)
-            if self.scaled_output:
-                objective_results = (objective_results - objective_results.mean()) / objective_results.std()
-
-            if len(param_configs[0]) == 1:
-                ax.plot(np.linspace(param_configs[0], param_configs[-1], self.size), objective_results, 'r')
-            else:
-                ax.plot(range(self.size), objective_results, 'r')
-
-            # take the parameter values for 1D, otherwise the indices
-            if len(param_configs[0]) == 1:
-                x_axis_param_configs = param_configs
-                test_x_x_axis = self.test_x_unscaled.squeeze().to(self.out_device).numpy()
-            else:
-                x_axis_param_configs = torch.arange(self.size)
-                test_x_x_axis = x_axis_param_configs[self.unvisited_configs].to(self.out_device)
-
-            # Get upper and lower confidence bounds
-            observed_pred = self.likelihood(self.model(self.test_x))
-            lower, upper = observed_pred.confidence_region()
-            lower, upper = lower.to(self.out_device), upper.to(self.out_device)
-
-            # Plot initial sample as green stars
-            initial_sample_x_axis = x_axis_param_configs[self.inital_sample_configs].to(self.out_device)
-            initial_sample_y_axis = self.results[self.inital_sample_configs].to(self.out_device)
-            ax.plot(initial_sample_x_axis.numpy(), initial_sample_y_axis.numpy(), 'g*')
-
-            # Plot training data as black stars
-            mask_training_data_no_initial_sample = ~self.inital_sample_configs == self.valid_configs
-            training_x_axis = x_axis_param_configs[mask_training_data_no_initial_sample].to(self.out_device)
-            training_y_axis = self.results[mask_training_data_no_initial_sample].to(self.out_device)
-            ax.plot(training_x_axis.numpy(), training_y_axis.numpy(), 'k*')
-
-            # Plot predictive means as blue line
-            test_x_y_axis = observed_pred.mean.to(self.out_device)
-            ax.plot(test_x_x_axis, test_x_y_axis.numpy(), 'b')
-
-            # Shade between the lower and upper confidence bounds
-            ax.fill_between(test_x_x_axis, lower.numpy(), upper.numpy(), alpha=0.5)
-
-            # set the limits and legend
-            # ax.set_ylim(min(objective_results), max(filter(lambda x: x != self.invalid_value, objective_results)))
-            ax.legend(['Objective Function', 'Initial Sample', 'Observed Data', 'Mean', 'Confidence'])
-
-            # draw the hyperparameter plots
-            # loss
-            axes2[0].plot(self.hyperparams_means['loss'])
-            axes2[0].set_ylabel('Loss')
-            axes2[0].set_xlabel('Number of evaluations')
-            # lengthscale
-            axes2[1].plot(self.hyperparams_means['lengthscale'])
-            axes2[1].set_ylabel('Lengthscale')
-            axes2[1].set_xlabel('Number of evaluations')
-            # noise
-            axes2[2].plot(self.hyperparams_means['noise'])
-            axes2[2].set_ylabel('Noise')
-            axes2[2].set_xlabel('Number of evaluations')
-
-            if self.animate:
-                # f.canvas.draw()
-                plt.savefig('animation_last_graph')
-                # plt.pause(0.1)
-
-            # plt.show()
-
-
-class CustomWarning(Warning):
-
-    def __init__(self, message: str, category: str) -> None:
-        # super().__init__()
-        self.message = message
-        self.category = category
-
-    def __str__(self):
-        return repr(self.message)
-
-    def category(self):
-        return self.category.__name__
-
-
-class AvoidedLossSurgeWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "AvoidedLossSurgeWarning")
-
-
-class NotPSDTrainingWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NotPSDTrainingWarning")
-
-
-class NaNTrainingWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NaNTrainingWarning")
-
-
-class NaNPredictionWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NaNPredictionWarning")
-
-
-class NotPSDPredictionWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NotPSDPredictionWarning")
-
-
-class ResetModelWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "ResetModelWarning")
-
-
-class MultipleMinimaWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "MultipleMinimaWarning")
-
-
-class AlreadyEvaluatedConflict(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "AlreadyEvaluatedConflict")
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
deleted file mode 100644
index cf733cdde..000000000
--- a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""BOTorch package from https://github.com/pytorch/botorch."""
-from __future__ import print_function
-
-from collections import OrderedDict
-
-try:
-    pass
-except Exception:
-    BayesianOptimization = None
-    bayes_opt_present = False
-
-from kernel_tuner.strategies import minimize
-
-supported_methods = ["poi", "ei", "ucb"]
-
-
-def tune(runner, kernel_options, device_options, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    if not bayes_opt_present:
-        raise ImportError("Error: optional dependency Bayesian Optimization not installed")
-    init_points = tuning_options.strategy_options.get("popsize", 20)
-    n_iter = tuning_options.strategy_options.get("max_fevals", 100)
-
-    # defaults as used by Bayesian Optimization Python package
-    acq = tuning_options.strategy_options.get("method", "ucb")
-    kappa = tuning_options.strategy_options.get("kappa", 2.576)
-    xi = tuning_options.strategy_options.get("xi", 0.0)
-
-    tuning_options["scaling"] = True
-
-    results = []
-
-    # function to pass to the optimizer
-    def func(**kwargs):
-        args = [kwargs[key] for key in tuning_options.tune_params.keys()]
-        return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results)
-
-    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
-    pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds))
-
-    verbose = 0
-    if tuning_options.verbose:
-        verbose = 2
-
-    # print(np.isnan(init_points).any())
-
-    optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose)
-
-    optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi)
-
-    if tuning_options.verbose:
-        print(optimizer.max)
-
-    return results, runner.dev.get_environment()
diff --git a/kernel_tuner/strategies/bayes_opt_ax.py b/kernel_tuner/strategies/bayes_opt_ax.py
deleted file mode 100644
index 2bb3ce8fc..000000000
--- a/kernel_tuner/strategies/bayes_opt_ax.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Bayesian Optimization implementation using the Ax platform."""
-
-from ax import optimize
-
-from kernel_tuner import util
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.common import (
-    CostFunc,
-)
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
-
-    ax_searchspace = searchspace.to_ax_searchspace()
-
-    try:
-        best_parameters, best_values, experiment, model = optimize(
-            parameters=ax_searchspace.parameters,
-            parameter_constraints=ax_searchspace.parameter_constraints,
-            # Booth function
-            evaluation_function=cost_func,
-            minimize=True,
-        )
-    except util.StopCriterionReached as e:
-        if tuning_options.verbose:
-            print(e)
-
-    return cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
index c3381731a..a55790e66 100644
--- a/kernel_tuner/strategies/bayes_opt_old.py
+++ b/kernel_tuner/strategies/bayes_opt_old.py
@@ -187,7 +187,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.invalid_value = 1e20
         self.opt_direction = opt_direction
         if opt_direction == 'min':
-            self.worst_value = np.PINF
+            self.worst_value = np.inf
             self.argopt = np.argmin
         elif opt_direction == 'max':
             self.worst_value = np.NINF
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
index dd206a37b..8d929054a 100644
--- a/test/strategies/test_bayesian_optimization.py
+++ b/test/strategies/test_bayesian_optimization.py
@@ -74,7 +74,7 @@ def test_bo_initialization():
     assert BO.searchspace == pruned_parameter_space
     assert BO.unvisited_cache == pruned_parameter_space
     assert len(BO.observations) == len(pruned_parameter_space)
-    assert BO.current_optimum == np.PINF
+    assert BO.current_optimum == np.inf
 
 def test_bo_initial_sample_lhs():
     sample = BO.draw_latin_hypercube_samples(num_samples=1)
diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index eaf546387..7b43fc722 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -181,22 +181,6 @@ def test_param_index_lookup():
     assert simple_searchspace.get_param_indices(last) == (3, 1, 1)
 
 
-def test_get_tensorspace():
-    """Test the generation of a tensor space."""
-    tensorspace = simple_searchspace.get_tensorspace()
-    assert tensorspace.shape == simple_searchspace.get_list_numpy().shape
-
-
-def test_conversion_tensor_param_config():
-    """Test the conversion from a parameter configuration to a tensor and tensor to parameter configuration."""
-    for config in simple_searchspace_single.list:
-        tensor = simple_searchspace_single.param_config_to_tensor(config)
-        config_2 = simple_searchspace_single.tensor_to_param_config(tensor)
-        assert config == config_2
-        assert tensor.equal(simple_searchspace_single.param_config_to_tensor(config_2))
-        assert len(tensor) == len(config) - 1
-
-
 def test_random_sample():
     """Test whether the random sample indices exists and are unique, and if it throws an error for too many samples."""
     random_sample_indices = searchspace.get_random_sample_indices(100)

From d2bb76a43d5d6cec310ace6c5ceadeb4e9a4b920 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 18:23:10 +0100
Subject: [PATCH 141/253] Avoid import of whole util module

---
 kernel_tuner/integration.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py
index d3219ba87..938c8c7c9 100644
--- a/kernel_tuner/integration.py
+++ b/kernel_tuner/integration.py
@@ -4,7 +4,7 @@
 
 from jsonschema import validate
 
-from kernel_tuner import util
+from kernel_tuner.util import get_instance_string, looks_like_a_filename, read_file
 
 #specifies for a number of pre-defined objectives whether
 #the objective should be minimized or maximized (boolean value denotes higher is better)
@@ -205,8 +205,8 @@ def top_result(item):
         meta["version_number"] = "1.0"
         meta["kernel_name"] = kernel_name
         if kernel_string and not callable(kernel_string) and not isinstance(kernel_string, list):
-            if util.looks_like_a_filename(kernel_string):
-                meta["kernel_string"] = util.read_file(kernel_string)
+            if looks_like_a_filename(kernel_string):
+                meta["kernel_string"] = read_file(kernel_string)
             else:
                 meta["kernel_string"] = kernel_string
         meta["objective"] = objective
@@ -337,7 +337,7 @@ def _select_best_common_config(results, objective, objective_higher_is_better):
     for config in results:
         params = config["tunable_parameters"]
 
-        config_str = util.get_instance_string(params)
+        config_str = get_instance_string(params)
         #count occurances
         results_table[config_str] = results_table.get(config_str,0) + 1
         #add to performance

From 58f147fe7bfb96a9fc201a7bce03e298b4f2930e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 18:24:01 +0100
Subject: [PATCH 142/253] Avoid import of whole util module

---
 .gitignore                |  2 +-
 doc/requirements.txt      |  4 +-
 doc/requirements_test.txt | 93 ++++++++++++++++-----------------------
 pyproject.toml            |  2 +-
 4 files changed, 41 insertions(+), 60 deletions(-)

diff --git a/.gitignore b/.gitignore
index ce4873209..1f576769a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 poetry.lock
 noxenv.txt
 noxsettings.toml
-hyperparamtuning/*
+hyperparamtuning*/*
 *.prof
 
 ### Python ###
diff --git a/doc/requirements.txt b/doc/requirements.txt
index fd92b26ff..378dccc76 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -36,7 +36,7 @@ nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or pyth
 nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
@@ -45,7 +45,7 @@ pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_
 platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt"
 pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index b5a5c1443..11ed8518b 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -189,62 +189,43 @@ nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or pyt
 nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286 \
-    --hash=sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542 \
-    --hash=sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f \
-    --hash=sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d \
-    --hash=sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0 \
-    --hash=sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7 \
-    --hash=sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3 \
-    --hash=sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3 \
-    --hash=sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146 \
-    --hash=sha256:2fa8fa7697ad1646b5c93de1719965844e004fcad23c91228aca1cf0800044a1 \
-    --hash=sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6 \
-    --hash=sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc \
-    --hash=sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9 \
-    --hash=sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592 \
-    --hash=sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00 \
-    --hash=sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298 \
-    --hash=sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8 \
-    --hash=sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2 \
-    --hash=sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392 \
-    --hash=sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb \
-    --hash=sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8 \
-    --hash=sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd \
-    --hash=sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9 \
-    --hash=sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0 \
-    --hash=sha256:892c10d6a73e0f14935c31229e03325a7b3093fafd6ce0af704be7f894d95687 \
-    --hash=sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc \
-    --hash=sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f \
-    --hash=sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4 \
-    --hash=sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a \
-    --hash=sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39 \
-    --hash=sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4 \
-    --hash=sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775 \
-    --hash=sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c \
-    --hash=sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd \
-    --hash=sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020 \
-    --hash=sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d \
-    --hash=sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24 \
-    --hash=sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7 \
-    --hash=sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f \
-    --hash=sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba \
-    --hash=sha256:cf28633d64294969c019c6df4ff37f5698e8326db68cc2b66576a51fad634880 \
-    --hash=sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d \
-    --hash=sha256:db1f1c22173ac1c58db249ae48aa7ead29f534b9a948bc56828337aa84a32ed6 \
-    --hash=sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854 \
-    --hash=sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017 \
-    --hash=sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8 \
-    --hash=sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae \
-    --hash=sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4 \
-    --hash=sha256:ea2bb7e2ae9e37d96835b3576a4fa4b3a97592fbea8ef7c3587078b0068b8f09 \
-    --hash=sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff \
-    --hash=sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960 \
-    --hash=sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee \
-    --hash=sha256:f4162988a360a29af158aeb4a2f4f09ffed6a969c9776f8f3bdee9b06a8ab7e5 \
-    --hash=sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c \
-    --hash=sha256:f7de08cbe5551911886d1ab60de58448c6df0f67d9feb7d1fb21e9875ef95e91
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
+    --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
+    --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
+    --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \
+    --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \
+    --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \
+    --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \
+    --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \
+    --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \
+    --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \
+    --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \
+    --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \
+    --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \
+    --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \
+    --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \
+    --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \
+    --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \
+    --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \
+    --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \
+    --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \
+    --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \
+    --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \
+    --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \
+    --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \
+    --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \
+    --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \
+    --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \
+    --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \
+    --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \
+    --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \
+    --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \
+    --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \
+    --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \
+    --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
+    --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
+    --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
diff --git a/pyproject.toml b/pyproject.toml
index 02e70089f..d6453286d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ classifiers = [
 # ATTENTION: if anything is changed here, run `poetry update`
 requires-python = ">=3.10,<3.15"  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
 dependencies = [
-    "numpy>=1.26.0",    # Python 3.12 requires numpy at least 1.26
+    "numpy (>=1.26.0,<2.0.0)",    # Python 3.12 requires numpy at least 1.26, CuPy does not support 2.0
     "scipy>=1.14.1",
     "packaging",        # required by file_utils
     "jsonschema",

From a48394a33e6880bbf5f22144997ee2fe99f8967a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 18:54:05 +0100
Subject: [PATCH 143/253] Avoid import of whole util module

---
 kernel_tuner/strategies/basinhopping.py        |  4 ++--
 kernel_tuner/strategies/bayes_opt.py           |  2 +-
 kernel_tuner/strategies/diff_evo.py            |  4 ++--
 kernel_tuner/strategies/dual_annealing.py      |  4 ++--
 kernel_tuner/strategies/firefly_algorithm.py   |  6 +++---
 kernel_tuner/strategies/genetic_algorithm.py   |  6 +++---
 kernel_tuner/strategies/greedy_ils.py          |  4 ++--
 kernel_tuner/strategies/greedy_mls.py          |  4 ++--
 kernel_tuner/strategies/minimize.py            |  4 ++--
 kernel_tuner/strategies/pso.py                 |  4 ++--
 kernel_tuner/strategies/random_sample.py       |  4 ++--
 kernel_tuner/strategies/simulated_annealing.py |  4 ++--
 test/strategies/test_strategies.py             |  4 ++--
 test/test_compiler_functions.py                |  4 ++--
 test/test_integration.py                       | 12 ++++++------
 test/test_kernelbuilder.py                     |  4 ++--
 16 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
index 20e800f6e..eed906676 100644
--- a/kernel_tuner/strategies/basinhopping.py
+++ b/kernel_tuner/strategies/basinhopping.py
@@ -1,7 +1,7 @@
 """The strategy that uses the basinhopping global optimization method."""
 import scipy.optimize
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options
@@ -31,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     try:
         opt_result = scipy.optimize.basinhopping(cost_func, x0, T=T, stepsize=eps,
                                              minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 775e4193a..451a0d5eb 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -149,7 +149,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         if max_fevals - bo.fevals <= 0:
             raise ValueError("No function evaluations left for optimization after sampling")
         bo.optimize(max_fevals)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index cd089ae1e..d77772992 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -1,7 +1,7 @@
 """The differential evolution strategy that optimizes the search through the parameter space."""
 from scipy.optimize import differential_evolution
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -32,7 +32,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     try:
         opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population_enc,
                                         polish=False, strategy=method, disp=tuning_options.verbose)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
index 7d9868c5e..598151ea5 100644
--- a/kernel_tuner/strategies/dual_annealing.py
+++ b/kernel_tuner/strategies/dual_annealing.py
@@ -1,7 +1,7 @@
 """The strategy that uses the dual annealing optimization method."""
 import scipy.optimize
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options
@@ -31,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     opt_result = None
     try:
         opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0, maxfun=max_fevals)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
index dc43aae6f..f4309d638 100644
--- a/kernel_tuner/strategies/firefly_algorithm.py
+++ b/kernel_tuner/strategies/firefly_algorithm.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
@@ -42,7 +42,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     for j in range(num_particles):
         try:
             swarm[j].compute_intensity(cost_func)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
@@ -65,7 +65,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
                     swarm[i].move_towards(swarm[j], beta, alpha)
                     try:
                         swarm[i].compute_intensity(cost_func)
-                    except util.StopCriterionReached as e:
+                    except StopCriterionReached as e:
                         if tuning_options.verbose:
                             print(e)
                         return cost_func.results
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 6a8565118..ec7c26f4c 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached, get_best_config
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -42,7 +42,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         for dna in population:
             try:
                 time = cost_func(dna, check_restrictions=False)
-            except util.StopCriterionReached as e:
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
@@ -54,7 +54,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         # 'best_score' is used only for printing
         if tuning_options.verbose and cost_func.results:
-            best_score = util.get_best_config(
+            best_score = get_best_config(
                 cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better
             )[tuning_options.objective]
 
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
index a4c521746..0608c092c 100644
--- a/kernel_tuner/strategies/greedy_ils.py
+++ b/kernel_tuner/strategies/greedy_ils.py
@@ -1,5 +1,5 @@
 """A simple greedy iterative local search algorithm for parameter search."""
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -40,7 +40,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         try:
             candidate = base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=True)
             new_score = cost_func(candidate, check_restrictions=False)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py
index 1b34da501..cdca53e12 100644
--- a/kernel_tuner/strategies/greedy_mls.py
+++ b/kernel_tuner/strategies/greedy_mls.py
@@ -1,5 +1,5 @@
 """A greedy multi-start local search algorithm for parameter search."""
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.hillclimbers import base_hillclimb
@@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         try:
             base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=randomize, order=order)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
index 80c1c6f82..71929a040 100644
--- a/kernel_tuner/strategies/minimize.py
+++ b/kernel_tuner/strategies/minimize.py
@@ -2,7 +2,7 @@
 
 import scipy.optimize
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import (
     CostFunc,
@@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     opt_result = None
     try:
         opt_result = scipy.optimize.minimize(cost_func, x0, method=method, options=options, **kwargs)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index cc6b82d49..82c500197 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
@@ -52,7 +52,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         for j in range(num_particles):
             try:
                 swarm[j].evaluate(cost_func)
-            except util.StopCriterionReached as e:
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
index 06ab4b9f6..57eaac6cc 100644
--- a/kernel_tuner/strategies/random_sample.py
+++ b/kernel_tuner/strategies/random_sample.py
@@ -1,7 +1,7 @@
 """Iterate over a random sample of the parameter space."""
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -26,7 +26,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     for sample in samples:
         try:
             cost_func(sample, check_restrictions=False)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index d73c0ad5e..b9738d741 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -52,7 +52,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             new_pos = neighbor(pos, searchspace)
             try:
                 new_cost = cost_func(new_pos, check_restrictions=False)
-            except util.StopCriterionReached as e:
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 9c0e9faca..8b2b92a45 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -4,7 +4,7 @@
 import pytest
 
 import kernel_tuner
-from kernel_tuner import util
+from kernel_tuner.util import InvalidConfig
 from kernel_tuner.interface import strategy_map
 
 from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
@@ -75,7 +75,7 @@ def test_strategies(vector_add, strategy):
         unique_results = {}
         for result in results:
             x_int = ",".join([str(v) for k, v in result.items() if k in tune_params])
-            if not isinstance(result["time"], util.InvalidConfig):
+            if not isinstance(result["time"], InvalidConfig):
                 unique_results[x_int] = result["time"]
         assert len(unique_results) <= filter_options["max_fevals"]
 
diff --git a/test/test_compiler_functions.py b/test/test_compiler_functions.py
index 913fee85d..cf35aac83 100644
--- a/test/test_compiler_functions.py
+++ b/test/test_compiler_functions.py
@@ -13,7 +13,7 @@
 import kernel_tuner
 from kernel_tuner.backends.compiler import CompilerFunctions, Argument, is_cupy_array, get_array_module
 from kernel_tuner.core import KernelSource, KernelInstance
-from kernel_tuner import util
+from kernel_tuner.util import delete_temp_file
 
 from .context import skip_if_no_gfortran, skip_if_no_gcc, skip_if_no_openmp, skip_if_no_cupy
 from .test_runners import env as cuda_env  # noqa: F401
@@ -391,7 +391,7 @@ def test_complies_fortran_function_with_module():
         assert np.isclose(result, 42.0)
 
     finally:
-        util.delete_temp_file("my_fancy_module.mod")
+        delete_temp_file("my_fancy_module.mod")
 
 
 @pytest.fixture
diff --git a/test/test_integration.py b/test/test_integration.py
index aafb437f1..637a07575 100644
--- a/test/test_integration.py
+++ b/test/test_integration.py
@@ -5,7 +5,7 @@
 import pytest
 
 from kernel_tuner import integration
-from kernel_tuner import util
+from kernel_tuner.util import delete_temp_file
 from datetime import datetime, timezone
 
 
@@ -71,7 +71,7 @@ def test_store_results(fake_results):
         assert my_gpu_100_data[0]["time"] < 100
 
     finally:
-        util.delete_temp_file(filename)
+        delete_temp_file(filename)
 
 
 def test_setup_device_targets(fake_results):
@@ -136,8 +136,8 @@ def test_setup_device_targets(fake_results):
         assert expected in output_str
 
     finally:
-        util.delete_temp_file(results_filename)
-        util.delete_temp_file(header_filename)
+        delete_temp_file(results_filename)
+        delete_temp_file(header_filename)
 
 
 def test_setup_device_targets_max(fake_results):
@@ -174,5 +174,5 @@ def test_setup_device_targets_max(fake_results):
         assert expected in output_str
 
     finally:
-        util.delete_temp_file(results_filename)
-        util.delete_temp_file(header_filename)
+        delete_temp_file(results_filename)
+        delete_temp_file(header_filename)
diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py
index c706e3953..9cd2d0185 100644
--- a/test/test_kernelbuilder.py
+++ b/test/test_kernelbuilder.py
@@ -3,8 +3,8 @@
 
 import pytest
 from kernel_tuner import kernelbuilder
-from kernel_tuner import util
 from kernel_tuner import integration
+from kernel_tuner.util import delete_temp_file
 
 
 backends = ["cuda", "cupy"]
@@ -59,4 +59,4 @@ def test_PythonKernel_tuned(test_kernel, backend):
         assert np.allclose(reference[0], a+b)
 
     finally:
-        util.delete_temp_file(test_results_file)
+        delete_temp_file(test_results_file)

From 5dd3e4c76e3de3db57218ea280075e7de50d5047 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 25 Mar 2025 12:02:56 +0100
Subject: [PATCH 144/253] Updated dependencies, required python version and
 bumped version

---
 kernel_tuner/backends/hypertuner.py | 5 +++--
 kernel_tuner/hyper.py               | 2 +-
 pyproject.toml                      | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 6348cc56d..66634e5c0 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -101,8 +101,9 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 10,
-                "samples": self.iterations 
+                "repeats": 25,
+                "samples": self.iterations,
+                "minimum_fraction_of_budget_valid": 0.01, 
             }
         }
 
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 27672cf97..ed61558e5 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -152,7 +152,7 @@ def put_if_not_present(target_dict, key, value):
     elif strategy_to_tune.lower() == "bayes_opt":
         hyperparams = {
             # 'covariancekernel': ["constantrbf", "rbf", "matern32", "matern52"],
-            # 'covariancelengthscale': [1.0, 1.5, 2.0],
+            'covariancelengthscale': [1.0, 1.5, 2.0],
             'method': ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"],
             'samplingmethod': ["random", "LHS"],
             'popsize': [10, 20, 30]
diff --git a/pyproject.toml b/pyproject.toml
index d6453286d..d00045d67 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 [project]
 name = "kernel_tuner"
 description = "An easy to use CUDA/OpenCL kernel tuner in Python"
-version = "1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
+version = "1.1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
 readme = "README.md"
 license = "Apache-2.0"
 authors = [
@@ -44,13 +44,13 @@ classifiers = [
 ]
 
 # ATTENTION: if anything is changed here, run `poetry update`
-requires-python = ">=3.10,<3.15"  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
+requires-python = ">=3.10,<4" # <4 is because of hip-python-fork  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
 dependencies = [
     "numpy (>=1.26.0,<2.0.0)",    # Python 3.12 requires numpy at least 1.26, CuPy does not support 2.0
     "scipy>=1.14.1",
     "packaging",        # required by file_utils
     "jsonschema",
-    "python-constraint2>=2.1.0",
+    "python-constraint2>=2.2.0",
     "xmltodict",
     "pandas>=2.0.0",
     "scikit-learn>=1.0.2",

From 02833f380e8d579f572c896cc30eeb0213b43096 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 25 Mar 2025 13:06:51 +0100
Subject: [PATCH 145/253] Updated dependencies, required python version and
 bumped version

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d00045d67..1a7684138 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 [project]
 name = "kernel_tuner"
 description = "An easy to use CUDA/OpenCL kernel tuner in Python"
-version = "1.1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
+version = "1.2.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
 readme = "README.md"
 license = "Apache-2.0"
 authors = [
@@ -80,7 +80,7 @@ include = [
 # cupy-cuda12x = { version = "*", optional = true }
 # cuda-python = { version = "*", optional = true }
 [project.optional-dependencies]
-cuda = ["pycuda>=2024.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
+cuda = ["pycuda>=2025.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
 opencl = ["pyopencl"]                                                   # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
 cuda_opencl = ["pycuda>=2024.1", "pyopencl"]                            # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
 hip = ["hip-python-fork"]

From b820419d9a8dc1561ff122e41836bf448f2d5543 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 25 Mar 2025 13:11:09 +0100
Subject: [PATCH 146/253] Updated documentation dependencies

---
 doc/requirements.txt      | 172 +++++++++++------------
 doc/requirements_test.txt | 284 +++++++++++++++++++-------------------
 2 files changed, 228 insertions(+), 228 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 378dccc76..96ba317c1 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,87 +1,87 @@
-alabaster==0.7.16 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-babel==2.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-bleach==6.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-certifi==2025.1.31 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-cffi==1.17.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32"
-decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-defusedxml==0.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-docutils==0.20.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-dom-toml==2.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+alabaster==0.7.16 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+babel==2.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+bleach==6.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+certifi==2025.1.31 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+cffi==1.17.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32"
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+defusedxml==0.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+docutils==0.20.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+dom-toml==2.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
 exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
-executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-idna==3.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-imagesize==1.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jinja2==3.1.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jupyter-client==8.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jupyter-core==5.7.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-mistune==3.1.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-natsort==8.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
-platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt"
-pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pywin32==310 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" and platform_python_implementation != "PyPy" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" and platform_python_implementation != "PyPy"
-pyzmq==26.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-requests==2.32.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-soupsieve==2.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinx==7.4.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tinycss2==1.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tornado==6.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-urllib3==2.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-webencodings==0.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+idna==3.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+imagesize==1.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jinja2==3.1.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jupyter-client==8.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jupyter-core==5.7.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+mistune==3.1.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+natsort==8.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten")
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and os_name != "nt" or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten")
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+python-constraint2==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pytz==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pywin32==310 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" and platform_python_implementation != "PyPy" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32" and platform_python_implementation != "PyPy"
+pyzmq==26.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+requests==2.32.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+soupsieve==2.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinx==7.4.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tinycss2==1.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tornado==6.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tzdata==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+urllib3==2.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index 11ed8518b..8a5ac0b63 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -1,122 +1,122 @@
-argcomplete==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b \
-    --hash=sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd
-asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+argcomplete==3.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:927531c2fbaa004979f18c2316f6ffadcfc5cc2de15ae2624dfe65deaf60e14f \
+    --hash=sha256:cef54d7f752560570291214f0f1c48c3b8ef09aca63d65de7747612666725dbc
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \
     --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2
-attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \
     --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b
-build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \
     --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7
-colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and os_name == "nt" \
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32" or python_version >= "3.12" and python_version < "4" and os_name == "nt" \
     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
-colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \
     --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2
-coverage==7.7.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:056d3017ed67e7ddf266e6f57378ece543755a4c9231e997789ab3bd11392c94 \
-    --hash=sha256:0ce8cf59e09d31a4915ff4c3b94c6514af4c84b22c4cc8ad7c3c546a86150a92 \
-    --hash=sha256:104bf640f408f4e115b85110047c7f27377e1a8b7ba86f7db4fa47aa49dc9a8e \
-    --hash=sha256:1393e5aa9441dafb0162c36c8506c648b89aea9565b31f6bfa351e66c11bcd82 \
-    --hash=sha256:1586ad158523f4133499a4f322b230e2cfef9cc724820dbd58595a5a236186f4 \
-    --hash=sha256:180e3fc68ee4dc5af8b33b6ca4e3bb8aa1abe25eedcb958ba5cff7123071af68 \
-    --hash=sha256:1b336d06af14f8da5b1f391e8dec03634daf54dfcb4d1c4fb6d04c09d83cef90 \
-    --hash=sha256:1c8fbce80b2b8bf135d105aa8f5b36eae0c57d702a1cc3ebdea2a6f03f6cdde5 \
-    --hash=sha256:2d673e3add00048215c2cc507f1228a7523fd8bf34f279ac98334c9b07bd2656 \
-    --hash=sha256:316f29cc3392fa3912493ee4c83afa4a0e2db04ff69600711f8c03997c39baaa \
-    --hash=sha256:33c1394d8407e2771547583b66a85d07ed441ff8fae5a4adb4237ad39ece60db \
-    --hash=sha256:37cbc7b0d93dfd133e33c7ec01123fbb90401dce174c3b6661d8d36fb1e30608 \
-    --hash=sha256:39abcacd1ed54e2c33c54bdc488b310e8ef6705833f7148b6eb9a547199d375d \
-    --hash=sha256:3ab7090f04b12dc6469882ce81244572779d3a4b67eea1c96fb9ecc8c607ef39 \
-    --hash=sha256:3b0e6e54591ae0d7427def8a4d40fca99df6b899d10354bab73cd5609807261c \
-    --hash=sha256:416e2a8845eaff288f97eaf76ab40367deafb9073ffc47bf2a583f26b05e5265 \
-    --hash=sha256:4545485fef7a8a2d8f30e6f79ce719eb154aab7e44217eb444c1d38239af2072 \
-    --hash=sha256:4c124025430249118d018dcedc8b7426f39373527c845093132196f2a483b6dd \
-    --hash=sha256:4fbb7a0c3c21908520149d7751cf5b74eb9b38b54d62997b1e9b3ac19a8ee2fe \
-    --hash=sha256:52fc89602cde411a4196c8c6894afb384f2125f34c031774f82a4f2608c59d7d \
-    --hash=sha256:55143aa13c49491f5606f05b49ed88663446dce3a4d3c5d77baa4e36a16d3573 \
-    --hash=sha256:57f3bd0d29bf2bd9325c0ff9cc532a175110c4bf8f412c05b2405fd35745266d \
-    --hash=sha256:5b2f144444879363ea8834cd7b6869d79ac796cb8f864b0cfdde50296cd95816 \
-    --hash=sha256:5efdeff5f353ed3352c04e6b318ab05c6ce9249c25ed3c2090c6e9cadda1e3b2 \
-    --hash=sha256:60e6347d1ed882b1159ffea172cb8466ee46c665af4ca397edbf10ff53e9ffaf \
-    --hash=sha256:693d921621a0c8043bfdc61f7d4df5ea6d22165fe8b807cac21eb80dd94e4bbd \
-    --hash=sha256:708f0a1105ef2b11c79ed54ed31f17e6325ac936501fc373f24be3e6a578146a \
-    --hash=sha256:70f0925c4e2bfc965369f417e7cc72538fd1ba91639cf1e4ef4b1a6b50439b3b \
-    --hash=sha256:7789e700f33f2b133adae582c9f437523cd5db8de845774988a58c360fc88253 \
-    --hash=sha256:7b6c96d69928a3a6767fab8dc1ce8a02cf0156836ccb1e820c7f45a423570d98 \
-    --hash=sha256:7d2a65876274acf544703e943c010b60bd79404e3623a1e5d52b64a6e2728de5 \
-    --hash=sha256:7f18d47641282664276977c604b5a261e51fefc2980f5271d547d706b06a837f \
-    --hash=sha256:89078312f06237417adda7c021c33f80f7a6d2db8572a5f6c330d89b080061ce \
-    --hash=sha256:8c938c6ae59be67ac19a7204e079efc94b38222cd7d0269f96e45e18cddeaa59 \
-    --hash=sha256:8e336b56301774ace6be0017ff85c3566c556d938359b61b840796a0202f805c \
-    --hash=sha256:a0a207c87a9f743c8072d059b4711f8d13c456eb42dac778a7d2e5d4f3c253a7 \
-    --hash=sha256:a2454b12a3f12cc4698f3508912e6225ec63682e2ca5a96f80a2b93cef9e63f3 \
-    --hash=sha256:a538a23119d1e2e2ce077e902d02ea3d8e0641786ef6e0faf11ce82324743944 \
-    --hash=sha256:aa4dff57fc21a575672176d5ab0ef15a927199e775c5e8a3d75162ab2b0c7705 \
-    --hash=sha256:ad0edaa97cb983d9f2ff48cadddc3e1fb09f24aa558abeb4dc9a0dbacd12cbb4 \
-    --hash=sha256:ae8006772c6b0fa53c33747913473e064985dac4d65f77fd2fdc6474e7cd54e4 \
-    --hash=sha256:b0fac2088ec4aaeb5468b814bd3ff5e5978364bfbce5e567c44c9e2854469f6c \
-    --hash=sha256:b3e212a894d8ae07fde2ca8b43d666a6d49bbbddb10da0f6a74ca7bd31f20054 \
-    --hash=sha256:b54a1ee4c6f1905a436cbaa04b26626d27925a41cbc3a337e2d3ff7038187f07 \
-    --hash=sha256:b667b91f4f714b17af2a18e220015c941d1cf8b07c17f2160033dbe1e64149f0 \
-    --hash=sha256:b8c36093aca722db73633cf2359026ed7782a239eb1c6db2abcff876012dc4cf \
-    --hash=sha256:bb356e7ae7c2da13f404bf8f75be90f743c6df8d4607022e759f5d7d89fe83f8 \
-    --hash=sha256:bce730d484038e97f27ea2dbe5d392ec5c2261f28c319a3bb266f6b213650135 \
-    --hash=sha256:c075d167a6ec99b798c1fdf6e391a1d5a2d054caffe9593ba0f97e3df2c04f0e \
-    --hash=sha256:c4e09534037933bf6eb31d804e72c52ec23219b32c1730f9152feabbd7499463 \
-    --hash=sha256:c5f8a5364fc37b2f172c26a038bc7ec4885f429de4a05fc10fdcb53fb5834c5c \
-    --hash=sha256:cb203c0afffaf1a8f5b9659a013f8f16a1b2cad3a80a8733ceedc968c0cf4c57 \
-    --hash=sha256:cc41374d2f27d81d6558f8a24e5c114580ffefc197fd43eabd7058182f743322 \
-    --hash=sha256:cd879d4646055a573775a1cec863d00c9ff8c55860f8b17f6d8eee9140c06166 \
-    --hash=sha256:d013c07061751ae81861cae6ec3a4fe04e84781b11fd4b6b4201590234b25c7b \
-    --hash=sha256:d8c7524779003d59948c51b4fcbf1ca4e27c26a7d75984f63488f3625c328b9b \
-    --hash=sha256:d9710521f07f526de30ccdead67e6b236fe996d214e1a7fba8b36e2ba2cd8261 \
-    --hash=sha256:e1ffde1d6bc2a92f9c9207d1ad808550873748ac2d4d923c815b866baa343b3f \
-    --hash=sha256:e7f559c36d5cdc448ee13e7e56ed7b6b5d44a40a511d584d388a0f5d940977ba \
-    --hash=sha256:f2a1e18a85bd066c7c556d85277a7adf4651f259b2579113844835ba1a74aafd \
-    --hash=sha256:f32b165bf6dfea0846a9c9c38b7e1d68f313956d60a15cde5d1709fddcaf3bee \
-    --hash=sha256:f5a2f71d6a91238e7628f23538c26aa464d390cbdedf12ee2a7a0fb92a24482a \
-    --hash=sha256:f81fe93dc1b8e5673f33443c0786c14b77e36f1025973b85e07c70353e46882b
-decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+coverage==7.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:02fad4f8faa4153db76f9246bc95c1d99f054f4e0a884175bff9155cf4f856cb \
+    --hash=sha256:092b134129a8bb940c08b2d9ceb4459af5fb3faea77888af63182e17d89e1cf1 \
+    --hash=sha256:0ce92c5a9d7007d838456f4b77ea159cb628187a137e1895331e530973dcf862 \
+    --hash=sha256:0dab4ef76d7b14f432057fdb7a0477e8bffca0ad39ace308be6e74864e632271 \
+    --hash=sha256:1165490be0069e34e4f99d08e9c5209c463de11b471709dfae31e2a98cbd49fd \
+    --hash=sha256:11dd6f52c2a7ce8bf0a5f3b6e4a8eb60e157ffedc3c4b4314a41c1dfbd26ce58 \
+    --hash=sha256:15d54ecef1582b1d3ec6049b20d3c1a07d5e7f85335d8a3b617c9960b4f807e0 \
+    --hash=sha256:171e9977c6a5d2b2be9efc7df1126fd525ce7cad0eb9904fe692da007ba90d81 \
+    --hash=sha256:177d837339883c541f8524683e227adcaea581eca6bb33823a2a1fdae4c988e1 \
+    --hash=sha256:18f544356bceef17cc55fcf859e5664f06946c1b68efcea6acdc50f8f6a6e776 \
+    --hash=sha256:199a1272e642266b90c9f40dec7fd3d307b51bf639fa0d15980dc0b3246c1393 \
+    --hash=sha256:1e6f867379fd033a0eeabb1be0cffa2bd660582b8b0c9478895c509d875a9d9e \
+    --hash=sha256:2444fbe1ba1889e0b29eb4d11931afa88f92dc507b7248f45be372775b3cef4f \
+    --hash=sha256:25fe40967717bad0ce628a0223f08a10d54c9d739e88c9cbb0f77b5959367542 \
+    --hash=sha256:264ff2bcce27a7f455b64ac0dfe097680b65d9a1a293ef902675fa8158d20b24 \
+    --hash=sha256:2a79c4a09765d18311c35975ad2eb1ac613c0401afdd9cb1ca4110aeb5dd3c4c \
+    --hash=sha256:2c492401bdb3a85824669d6a03f57b3dfadef0941b8541f035f83bbfc39d4282 \
+    --hash=sha256:315ff74b585110ac3b7ab631e89e769d294f303c6d21302a816b3554ed4c81af \
+    --hash=sha256:34a3bf6b92e6621fc4dcdaab353e173ccb0ca9e4bfbcf7e49a0134c86c9cd303 \
+    --hash=sha256:37351dc8123c154fa05b7579fdb126b9f8b1cf42fd6f79ddf19121b7bdd4aa04 \
+    --hash=sha256:385618003e3d608001676bb35dc67ae3ad44c75c0395d8de5780af7bb35be6b2 \
+    --hash=sha256:392cc8fd2b1b010ca36840735e2a526fcbd76795a5d44006065e79868cc76ccf \
+    --hash=sha256:3d03287eb03186256999539d98818c425c33546ab4901028c8fa933b62c35c3a \
+    --hash=sha256:44683f2556a56c9a6e673b583763096b8efbd2df022b02995609cf8e64fc8ae0 \
+    --hash=sha256:44af11c00fd3b19b8809487630f8a0039130d32363239dfd15238e6d37e41a48 \
+    --hash=sha256:452735fafe8ff5918236d5fe1feac322b359e57692269c75151f9b4ee4b7e1bc \
+    --hash=sha256:4c181ceba2e6808ede1e964f7bdc77bd8c7eb62f202c63a48cc541e5ffffccb6 \
+    --hash=sha256:4dd532dac197d68c478480edde74fd4476c6823355987fd31d01ad9aa1e5fb59 \
+    --hash=sha256:520af84febb6bb54453e7fbb730afa58c7178fd018c398a8fcd8e269a79bf96d \
+    --hash=sha256:553ba93f8e3c70e1b0031e4dfea36aba4e2b51fe5770db35e99af8dc5c5a9dfe \
+    --hash=sha256:5b7b02e50d54be6114cc4f6a3222fec83164f7c42772ba03b520138859b5fde1 \
+    --hash=sha256:63306486fcb5a827449464f6211d2991f01dfa2965976018c9bab9d5e45a35c8 \
+    --hash=sha256:75c82b27c56478d5e1391f2e7b2e7f588d093157fa40d53fd9453a471b1191f2 \
+    --hash=sha256:7ba5ff236c87a7b7aa1441a216caf44baee14cbfbd2256d306f926d16b026578 \
+    --hash=sha256:7e688010581dbac9cab72800e9076e16f7cccd0d89af5785b70daa11174e94de \
+    --hash=sha256:80b5b207a8b08c6a934b214e364cab2fa82663d4af18981a6c0a9e95f8df7602 \
+    --hash=sha256:822fa99dd1ac686061e1219b67868e25d9757989cf2259f735a4802497d6da31 \
+    --hash=sha256:881cae0f9cbd928c9c001487bb3dcbfd0b0af3ef53ae92180878591053be0cb3 \
+    --hash=sha256:88d96127ae01ff571d465d4b0be25c123789cef88ba0879194d673fdea52f54e \
+    --hash=sha256:8b1c65a739447c5ddce5b96c0a388fd82e4bbdff7251396a70182b1d83631019 \
+    --hash=sha256:8fed429c26b99641dc1f3a79179860122b22745dd9af36f29b141e178925070a \
+    --hash=sha256:9bb47cc9f07a59a451361a850cb06d20633e77a9118d05fd0f77b1864439461b \
+    --hash=sha256:a6b6b3bd121ee2ec4bd35039319f3423d0be282b9752a5ae9f18724bc93ebe7c \
+    --hash=sha256:ae13ed5bf5542d7d4a0a42ff5160e07e84adc44eda65ddaa635c484ff8e55917 \
+    --hash=sha256:af94fb80e4f159f4d93fb411800448ad87b6039b0500849a403b73a0d36bb5ae \
+    --hash=sha256:b4c144c129343416a49378e05c9451c34aae5ccf00221e4fa4f487db0816ee2f \
+    --hash=sha256:b52edb940d087e2a96e73c1523284a2e94a4e66fa2ea1e2e64dddc67173bad94 \
+    --hash=sha256:b559adc22486937786731dac69e57296cb9aede7e2687dfc0d2696dbd3b1eb6b \
+    --hash=sha256:b838a91e84e1773c3436f6cc6996e000ed3ca5721799e7789be18830fad009a2 \
+    --hash=sha256:ba8480ebe401c2f094d10a8c4209b800a9b77215b6c796d16b6ecdf665048950 \
+    --hash=sha256:bc96441c9d9ca12a790b5ae17d2fa6654da4b3962ea15e0eabb1b1caed094777 \
+    --hash=sha256:c90e9141e9221dd6fbc16a2727a5703c19443a8d9bf7d634c792fa0287cee1ab \
+    --hash=sha256:d2e73e2ac468536197e6b3ab79bc4a5c9da0f078cd78cfcc7fe27cf5d1195ef0 \
+    --hash=sha256:d3154b369141c3169b8133973ac00f63fcf8d6dbcc297d788d36afbb7811e511 \
+    --hash=sha256:d66ff48ab3bb6f762a153e29c0fc1eb5a62a260217bc64470d7ba602f5886d20 \
+    --hash=sha256:d6874929d624d3a670f676efafbbc747f519a6121b581dd41d012109e70a5ebd \
+    --hash=sha256:e33426a5e1dc7743dd54dfd11d3a6c02c5d127abfaa2edd80a6e352b58347d1a \
+    --hash=sha256:e52eb31ae3afacdacfe50705a15b75ded67935770c460d88c215a9c0c40d0e9c \
+    --hash=sha256:eae79f8e3501133aa0e220bbc29573910d096795882a70e6f6e6637b09522133 \
+    --hash=sha256:eebd927b86761a7068a06d3699fd6c20129becf15bb44282db085921ea0f1585 \
+    --hash=sha256:eff187177d8016ff6addf789dcc421c3db0d014e4946c1cc3fbf697f7852459d \
+    --hash=sha256:f5f99a93cecf799738e211f9746dc83749b5693538fbfac279a61682ba309387 \
+    --hash=sha256:fbba59022e7c20124d2f520842b75904c7b9f16c854233fa46575c69949fb5b9
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \
     --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a
-distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \
     --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403
 exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \
     --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
     --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
-executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \
     --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755
-filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \
     --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de
 importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \
     --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \
     --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580
-iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \
     --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760
-ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \
     --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a
-jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \
     --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9
-joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \
     --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e
-jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \
     --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf
-jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \
     --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566
-markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \
     --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \
     --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \
@@ -177,19 +177,19 @@ markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or pyt
     --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \
     --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \
     --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68
-matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \
     --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca
-mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \
     --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f
-nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \
     --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682
-nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
     --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
     --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
@@ -226,10 +226,10 @@ numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_
     --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
     --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
     --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
-packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
-pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \
     --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \
     --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \
@@ -272,72 +272,72 @@ pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_
     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
-parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \
     --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d
-pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \
     --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d
-pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \
     --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \
     --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f
-platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \
     --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351
-pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
     --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
-prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \
     --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198
-ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and os_name != "nt" or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \
     --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \
     --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220
-pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \
     --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42
-pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \
     --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c
-pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \
     --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913
-pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \
     --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857
-pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \
     --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e
-pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \
     --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845
-python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:02f46e4a7e8a46048604870287f1c55312eea47c2c15dd58b51057cb7d057bdc \
-    --hash=sha256:0e5ece0b4e85ed680af6b9db33ef3497a6f9499b8957cd830cd139f17ac29aef \
-    --hash=sha256:0f3a09c1947e6a90b9558cd1651e86dbe10f698aad56247596f2b856307707f0 \
-    --hash=sha256:1c650d717c2585fd8b2247f680ca1dcc6ea970cc5644c1d847f97eacb9f7dce2 \
-    --hash=sha256:38e4dbb2522ca2295873a57f6e0fddbb0856a780c87edd79b4074fd78790fed3 \
-    --hash=sha256:441f6a06e6c88c5fbe724b834c820d959ba7542037139153d1466c7be00c7cc0 \
-    --hash=sha256:6b8f82be66242fc5587011360b07c39e6e71e5d1c8f26a107dd2b04ab7854fcc \
-    --hash=sha256:8086a21724048746e68ab721cb4a216db15f86bb700d557af0ac60f2087d4d4e \
-    --hash=sha256:abea9ae443bf33fb396a6fb597b713e110f2abd9ecf1a656cd81f53da6751b79 \
-    --hash=sha256:ace17786565250de48b8d18da555feb31f5fb3521b2bd65e9871459e1d179600 \
-    --hash=sha256:b2385c99a9fe67ae26085a5a048c1d206cf0bd74acf0cd036227afa2a90fa4fd \
-    --hash=sha256:e29bed90abe1240bf24794e73e4d8fa3e50b6aa9226d915b1902cdd03375c28b \
-    --hash=sha256:ee3d33ca5694724a17bb596b93ff8687c70b4c07945e40a9007250e282e7ab28 \
-    --hash=sha256:f28d07eae04d83d454f0e6ba2da0678786a21f2d405998a3eec960b56d809692 \
-    --hash=sha256:fbb6ab033a7a4250bce11ca12fdf8958c6c42853e933cf585dbd265e0967dd93 \
-    --hash=sha256:fc3cffd0f16cb9b34d2e95bd6d27425dd24044073760477a1341e835fc9c45f4
-python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+python-constraint2==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:0a841b088076d9dc481989359076b439d5201126583d920173ed9ab9cf7c4771 \
+    --hash=sha256:0f0acfbae77ef7fcbff25d1c46b2360e0c486667c1a595b5c7cd4a6540cad5e6 \
+    --hash=sha256:203b740a78266123e36d88215bb232e5e682c5845b674d2d5b1218fb3394ff1f \
+    --hash=sha256:298c322c157ae6f5a9a9b8de3d08eefcdfed7b78e4abb2ddffe1bd345ed7997b \
+    --hash=sha256:348ee17de0de028b68bf8050af142adfae37b500e60ac6758dc499bc19712805 \
+    --hash=sha256:46cb1946fc7fb63262c43d4366f8cfceb551fb7a2bf10f275ac236d968746e02 \
+    --hash=sha256:48c4f8ca1573f08bb6ef900cbe2e642aa6afb77c11a1f7c9d42c054fcfd93b8b \
+    --hash=sha256:7bf723afbfdd13155f38d1344b015fd962818fdf70cdf39005a6a5bf810e5001 \
+    --hash=sha256:85ea5330b12ccb4a474c89e3fdd037c5173db0216985da0e9a5bc20f6e26d0ca \
+    --hash=sha256:8a39fecbb893137814a4f0ce82fd78df68789d658c6991bb6d57d773a6f8878d \
+    --hash=sha256:aae18d318fd5150cda3befcf40b178a8dc661abb79cf663fefb7edd6e3afd6ab \
+    --hash=sha256:b4d6159d05204cddfa4e46eef24a10f1d6aed41a905ca83314f5d1caa31599ab \
+    --hash=sha256:c337839cfb0b3559f2f211e2ae67993c7187abf5dddbc5b587fe26b7c1b5d0b0 \
+    --hash=sha256:c3b887f073f59cf5151df3cd25c2142016676da9034d5af56478c735526882d3 \
+    --hash=sha256:d060b179461f09ee6571222ee63b4ac8dafdb6a41ffa75296a2f6b07a6bc500e \
+    --hash=sha256:f1590a5699e1097f0057513e64bac4ac2d11f5848467c1c27967e1217f8bec3d
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
     --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
-pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57 \
-    --hash=sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e
-referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytz==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \
+    --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \
     --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0
-rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \
     --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \
     --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \
@@ -441,7 +441,7 @@ rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or pytho
     --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \
     --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \
     --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8
-ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \
     --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \
     --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \
@@ -459,7 +459,7 @@ ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_v
     --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \
     --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \
     --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca
-scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \
     --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \
     --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \
@@ -490,7 +490,7 @@ scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or p
     --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \
     --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \
     --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415
-scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \
     --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \
     --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \
@@ -537,16 +537,16 @@ scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_
     --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \
     --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \
     --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db
-six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
     --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81
-stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \
     --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695
-threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \
     --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e
-tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \
     --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \
     --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \
@@ -579,25 +579,25 @@ tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_v
     --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \
     --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \
     --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7
-tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \
     --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79
-traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \
     --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f
-typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
-tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694 \
-    --hash=sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639
-virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+tzdata==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \
+    --hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9
+virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \
     --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac
-wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \
     --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5
-xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \
     --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac
 zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \

From 11b378fea7310199aa120eed05b0c5ced1fe604c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 10:45:49 +0100
Subject: [PATCH 147/253] Added python version classifiers

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 1a7684138..1d17ecadc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,10 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development",
     "Topic :: System :: Distributed Computing",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13"
 ]
 
 # ATTENTION: if anything is changed here, run `poetry update`

From 6550916492ce3a575701cdabfb43ef83e2008bae Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:18:02 +0100
Subject: [PATCH 148/253] Improved code quality based on sonarcloud issues

---
 kernel_tuner/python.py      | 110 ++++++++++++++++++------------------
 kernel_tuner/searchspace.py |   3 +-
 2 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
index 00f2b24c1..0f450c690 100644
--- a/kernel_tuner/python.py
+++ b/kernel_tuner/python.py
@@ -31,7 +31,7 @@
 
 
 class PythonFunctions(object):
-    """Class that groups the code for running and compiling C functions"""
+    """Class that groups the code for running Python"""
 
     def __init__(self, iterations=7, observers=None, parallel_mode=False, hyperparam_mode=False, show_progressbar=False):
         """instantiate PythonFunctions object used for interacting with Python code
@@ -217,59 +217,61 @@ def benchmark_hyperparams(self, func, args, threads, grid):
         # print(f"In {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
         return result
 
-        start_time = perf_counter()
-        if self.parallel_mode:
-            num_procs = max(cpu_count() - 1, 1)
-            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
-            manager = Manager()
-            MRE_values = manager.list()
-            runtimes = manager.list()
-            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
-                args = func, args, self.params
-                MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                MRE_values, runtimes = list(MRE_values), list(runtimes)
-                print(MRE_values)
-            result["times"] = values
-            result["strategy_time"] = np.mean(runtimes)
-            np_results = np.array(values)
-        else:
-            # sequential implementation
-            np_results = np.array([])
-            for iter in iterator:
-                for obs in self.observers:
-                    obs.before_start()
-                value = self.run_kernel(func, args)
-                for obs in self.observers:
-                    obs.after_finish()
-
-                if value < 0.0:
-                    raise ValueError("Invalid benchmark result")
-
-                result["times"].append(value)
-                np_results = np.append(np_results, value)
-                if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
-                    break
-
-            # fill up the remaining iters with invalid in case of a break
-            result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
-
-            # finish by instrumenting the results with the observers
-            for obs in self.observers:
-                result.update(obs.get_results())
-
-        benchmark_time = perf_counter() - start_time
-        self.benchmark_times.append(benchmark_time)
-        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
-
-        # calculate the mean of the means of the Mean Relative Error over the valid results
-        valid_results = np_results[np_results < invalid_value]
-        mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
-
-        # write the 'time' to the results and return
-        if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
-            mean_mean_MRE = invalid_value
-        result["time"] = mean_mean_MRE
-        return result
+        # old implementation
+
+        # start_time = perf_counter()
+        # if self.parallel_mode:
+        #     num_procs = max(cpu_count() - 1, 1)
+        #     logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+        #     manager = Manager()
+        #     MRE_values = manager.list()
+        #     runtimes = manager.list()
+        #     with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+        #         args = func, args, self.params
+        #         MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+        #         MRE_values, runtimes = list(MRE_values), list(runtimes)
+        #         print(MRE_values)
+        #     result["times"] = values
+        #     result["strategy_time"] = np.mean(runtimes)
+        #     np_results = np.array(values)
+
+        # # sequential implementation
+        # np_results = np.array([])
+        # for iter in iterator:
+        #     for obs in self.observers:
+        #         obs.before_start()
+        #     value = self.run_kernel(func, args)
+        #     for obs in self.observers:
+        #         obs.after_finish()
+
+        #     if value < 0.0:
+        #         raise ValueError("Invalid benchmark result")
+
+        #     result["times"].append(value)
+        #     np_results = np.append(np_results, value)
+        #     if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
+        #         break
+
+        # # fill up the remaining iters with invalid in case of a break
+        # result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
+
+        # # finish by instrumenting the results with the observers
+        # for obs in self.observers:
+        #     result.update(obs.get_results())
+
+        # benchmark_time = perf_counter() - start_time
+        # self.benchmark_times.append(benchmark_time)
+        # print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
+
+        # # calculate the mean of the means of the Mean Relative Error over the valid results
+        # valid_results = np_results[np_results < invalid_value]
+        # mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
+
+        # # write the 'time' to the results and return
+        # if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
+        #     mean_mean_MRE = invalid_value
+        # result["time"] = mean_mean_MRE
+        # return result
 
     def run_kernel(self, func, args, threads, grid):
         """runs the kernel once, returns whatever the kernel returns
diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 8b285f5ad..e650f9628 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -1017,5 +1017,4 @@ def to_ax_searchspace(self):
         raise NotImplementedError(
             "Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity."
         )
-
-        return ax_searchspace
+        # return ax_searchspace

From 6770d3ccf9c85cecc46619ba90590fe61983f134 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:27:03 +0100
Subject: [PATCH 149/253] Removed PythonFunctions approach to hyperparameter
 tuning that is no longer needed with the autotuning methodology interface

---
 kernel_tuner/python.py | 401 -----------------------------------------
 1 file changed, 401 deletions(-)
 delete mode 100644 kernel_tuner/python.py

diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
deleted file mode 100644
index 0f450c690..000000000
--- a/kernel_tuner/python.py
+++ /dev/null
@@ -1,401 +0,0 @@
-""" This module contains the functionality for running Python functions """
-
-from collections import namedtuple
-import platform
-import logging
-import warnings
-import importlib.util
-from math import ceil
-from time import perf_counter
-from typing import Tuple
-
-# import cProfile
-
-import progressbar
-import numpy as np
-
-# for parallel subprocess runs
-from multiprocess import Manager, cpu_count, get_context    # using Pathos as Python's multiprocessing is unable to pickle
-from itertools import repeat
-import subprocess
-import sys
-from os import getpid
-
-from kernel_tuner.util import get_temp_filename, delete_temp_file
-
-# This represents an individual kernel argument.
-# It contains a numpy object (ndarray or number) and a ctypes object with a copy
-# of the argument data. For an ndarray, the ctypes object is a wrapper for the ndarray's data.
-Argument = namedtuple("Argument", ["numpy", "ctypes"])
-invalid_value = 1e20
-
-
-class PythonFunctions(object):
-    """Class that groups the code for running Python"""
-
-    def __init__(self, iterations=7, observers=None, parallel_mode=False, hyperparam_mode=False, show_progressbar=False):
-        """instantiate PythonFunctions object used for interacting with Python code
-
-        :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
-        :type iterations: int
-        """
-        self.iterations = iterations
-        self.max_threads = 1024
-        self.show_progressbar = show_progressbar
-
-        #environment info
-        env = dict()
-        env["iterations"] = self.iterations
-        self.env = env
-        self.name = platform.processor()
-        self.observers = observers or []
-        self.num_unused_cores = 1    # do not use all cores to do other work
-        self.num_cores = max(min(cpu_count() - self.num_unused_cores, self.iterations), 1)    # assumes cpu_count does not change during the life of this class!
-        self.parallel_mode = parallel_mode and self.num_cores > 1
-        self.hyperparam_mode = hyperparam_mode
-
-        self.benchmark = self.benchmark_normal if not self.hyperparam_mode else self.benchmark_hyperparams
-
-        self.benchmark_times = []
-
-        if self.parallel_mode:
-            warnings.warn(
-                "Be sure to check that simulation mode is true for the kernel, because parallel mode requires a completed cache file to avoid race conditions.")
-
-        if len(self.observers) > 0 and self.parallel_mode:
-            raise NotImplementedError("Observers are currently not implemented for parallel execution.")
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *exc):
-        pass
-
-    def ready_argument_list(self, arguments):
-        """ready argument list to be passed to the Python function
-        """
-        return arguments
-
-    def compile(self, kernel_instance):
-        """ return the function from the kernel instance """
-
-        suffix = kernel_instance.kernel_source.get_user_suffix()
-        source_file = get_temp_filename(suffix=suffix)
-
-        spec = importlib.util.find_spec(kernel_instance.name)
-        foo = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(foo)
-        func = getattr(foo, kernel_instance.name)
-
-        self.params = kernel_instance.params
-
-        delete_temp_file(source_file)
-        return func
-
-    def benchmark_normal(self, func, args, threads, grid):
-        """runs the kernel repeatedly, returns times
-
-        :param func: A Python function for this specific configuration
-        :type func: ctypes._FuncPtr
-
-        :param args: A list of arguments to the function, order should match the
-            order in the code. The list should be prepared using
-            ready_argument_list().
-        :type args: list(Argument)
-
-        :param threads: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type threads: any
-
-        :param grid: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type grid: any
-
-        :returns: All times.
-        :rtype: dict()
-        """
-
-        result = dict()
-        result["times"] = []
-        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
-            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
-
-        # new implementation
-        start_time = perf_counter()
-        if self.parallel_mode:
-            logging.debug(f"Running benchmark in parallel on {self.num_cores} processors")
-            manager = Manager()
-            invalid_flag = manager.Value('i', int(False))
-            values = manager.list()
-            runtimes = manager.list()
-            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
-                args = func, args, self.params, invalid_flag
-                values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                values, runtimes = list(values), list(runtimes)
-            result["strategy_time"] = np.mean(runtimes)
-        else:
-            values = list()
-            for _ in range(self.iterations):
-                value = self.run_kernel(func, args, threads, grid)
-                if value < 0.0:
-                    raise Exception("too many resources requested for launch")
-                values.append(value)
-
-        benchmark_time = perf_counter() - start_time
-        self.benchmark_times.append(benchmark_time)
-
-        result["times"] = values
-        result["time"] = np.mean(values)
-        # print(f"Mean: {np.mean(values)}, std: {np.std(values)} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
-        return result
-
-    def benchmark_hyperparams(self, func, args, threads, grid):
-        """runs the kernel repeatedly, returns grandmedian for hyperparameter tuning
-
-        :param func: A Python function for this specific configuration
-        :type func: ctypes._FuncPtr
-
-        :param args: A list of arguments to the function, order should match the
-            order in the code. The list should be prepared using
-            ready_argument_list().
-        :type args: list(Argument)
-
-        :param threads: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type threads: any
-
-        :param grid: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type grid: any
-
-        :returns: All execution hyperparameter scores in the same format as times.
-        :rtype: dict()
-        """
-
-        # For reference: the following times were obtained with 35 repeats on random_sample strategy.
-        # As seen, there is a lot of overhead with subproceses; directly executing the function scales much better.
-        # time taken by sequential: 20.7 sec
-        # time taken by parallel in sequential form (subprocess overhead): 46.3 sec
-        # time taken by parallel subprocesses: 7.5 sec on 9, 9.9 sec on 8, 13.6 sec on 4, 27.8 sec on 2, 45.9 sec on 1
-        # time taken by parallel directly: 2.99 sec on 9, 4.0 sec on 8, 5.23 sec on 4, 11.3 sec on 2, 19.3 sec on 1
-
-        result = dict()
-        result["times"] = []
-        min_valid_iterations = ceil(self.iterations * 0.8)
-        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
-            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
-
-        # new implementation
-        start_time = perf_counter()
-        if self.parallel_mode:
-            logging.debug(f"Running hyperparameter benchmark in parallel on {self.num_cores} processors")
-            manager = Manager()
-            invalid_flag = manager.Value('i', int(False))
-            MWP_values = manager.list()
-            runtimes = manager.list()
-            warnings_dicts = manager.list()
-            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
-                args = func, args, self.params, invalid_flag
-                MWP_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                MWP_values, runtimes, warnings_dicts = list(MWP_values), list(runtimes), list(warnings_dicts)
-            result["strategy_time"] = np.mean(runtimes)
-            warning_dict = warnings_dicts[0]
-            for key in warning_dict.keys():
-                warning_dict[key] = np.mean(list(warnings_dict[key] for warnings_dict in warnings_dicts))
-            result["warnings"] = warning_dict
-        else:
-            raise NotImplementedError("Sequential mode has not been implemented yet")
-
-        benchmark_time = perf_counter() - start_time
-        self.benchmark_times.append(benchmark_time)
-
-        grandmean, times = get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations)
-        result["times"] = times
-        result["time"] = grandmean
-        print(f"Grandmean: {grandmean} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
-        # print(f"Grandmean: {grandmean}, mean MWP per iteration: {np.mean(times)}, std MWP per iteration: {np.std(times)}")
-        # print(f"In {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
-        return result
-
-        # old implementation
-
-        # start_time = perf_counter()
-        # if self.parallel_mode:
-        #     num_procs = max(cpu_count() - 1, 1)
-        #     logging.debug(f"Running benchmark in parallel on {num_procs} processors")
-        #     manager = Manager()
-        #     MRE_values = manager.list()
-        #     runtimes = manager.list()
-        #     with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
-        #         args = func, args, self.params
-        #         MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-        #         MRE_values, runtimes = list(MRE_values), list(runtimes)
-        #         print(MRE_values)
-        #     result["times"] = values
-        #     result["strategy_time"] = np.mean(runtimes)
-        #     np_results = np.array(values)
-
-        # # sequential implementation
-        # np_results = np.array([])
-        # for iter in iterator:
-        #     for obs in self.observers:
-        #         obs.before_start()
-        #     value = self.run_kernel(func, args)
-        #     for obs in self.observers:
-        #         obs.after_finish()
-
-        #     if value < 0.0:
-        #         raise ValueError("Invalid benchmark result")
-
-        #     result["times"].append(value)
-        #     np_results = np.append(np_results, value)
-        #     if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
-        #         break
-
-        # # fill up the remaining iters with invalid in case of a break
-        # result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
-
-        # # finish by instrumenting the results with the observers
-        # for obs in self.observers:
-        #     result.update(obs.get_results())
-
-        # benchmark_time = perf_counter() - start_time
-        # self.benchmark_times.append(benchmark_time)
-        # print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
-
-        # # calculate the mean of the means of the Mean Relative Error over the valid results
-        # valid_results = np_results[np_results < invalid_value]
-        # mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
-
-        # # write the 'time' to the results and return
-        # if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
-        #     mean_mean_MRE = invalid_value
-        # result["time"] = mean_mean_MRE
-        # return result
-
-    def run_kernel(self, func, args, threads, grid):
-        """runs the kernel once, returns whatever the kernel returns
-
-        :param func: A Python function for this specific configuration
-        :type func: ctypes._FuncPtr
-
-        :param args: A list of arguments to the function, order should match the
-            order in the code. The list should be prepared using
-            ready_argument_list().
-        :type args: list(Argument)
-
-        :param threads: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type threads: any
-
-        :param grid: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type grid: any
-
-        :returns: A robust average of values returned by the C function.
-        :rtype: float
-        """
-        logging.debug("run_kernel")
-        logging.debug("arguments=" + str([str(arg) for arg in args]))
-
-        time = func(*args, **self.params)
-
-        return time
-
-    units = {}
-
-
-def run_hyperparam_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
-    """ Function to run a hyperparam kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
-    PID = getpid()
-    # print(f"Iter {iter+1}, PID {PID}", flush=True)
-    func, funcargs, params, invalid_flag = args
-    logging.debug(f"run_kernel iter {iter} (PID {PID})")
-    logging.debug("arguments=" + str([str(arg) for arg in funcargs]))
-
-    # run the kernel
-    starttime = perf_counter()
-    # cProfile.runctx('func(invalid_flag, *funcargs, **params)', globals(), locals(), 'profile-%s.out' % str(iter + 1))
-    # values, warning_dict = None, None
-    values, warning_dict = func(invalid_flag, *funcargs, **params)
-    runtime = perf_counter() - starttime
-    return values, runtime, warning_dict
-
-
-def run_hyperparam_kernel_as_subprocess(iter, args):
-    """ Function to run a hyperparam kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
-    func, args, params = args
-    PID = getpid()
-    # print(f"Iter {iter}, PID {PID}", flush=True)
-    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
-    logging.debug("arguments=" + str([str(arg) for arg in args]))
-
-    def make_kwargstrings(**kwargs) -> list:
-        return list(f"{key}={value}" for key, value in kwargs.items())
-
-    # Subprocess
-    args += make_kwargstrings(**params)
-    proc = subprocess.run([sys.executable or 'python', str(func.__name__ + '.py')] + args, shell=False, capture_output=True)
-    stderr = f"subprocess {iter} with PID {PID} errors: {proc.stderr.decode('utf-8')}" if len(proc.stderr.decode('utf-8')) > 0 else ""
-    stdout = f"subprocess {iter} with PID {PID} output: {proc.stdout.decode('utf-8')}" if len(proc.stdout.decode('utf-8')) > 0 else ""
-
-    if stderr != "":
-        logging.debug(stderr)
-        print(stderr)
-    if stdout != "":
-        logging.debug(stdout)
-        # print(stdout)
-
-    time = float(stdout.split("result_value=")[1])
-    return time
-
-
-def get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations=1):
-    """ Get the grandmean (mean of median MWP per kernel) and mean MWP per iteration """
-    MWP_values = np.array(MWP_values)
-    median_MWPs = np.array([])
-    median_MWPs_vars = np.array([])
-    valid_MWP_times = list()
-    # get the mean MWP per kernel
-    for i in range(len(MWP_values[0])):
-        MWP_kernel_values = MWP_values[:, i]
-        valid_MWP_mask = (MWP_kernel_values < invalid_value) & (MWP_kernel_values >= 0)
-        valid_MWP_kernel_values = MWP_kernel_values[valid_MWP_mask]
-        if len(valid_MWP_kernel_values) >= min_valid_iterations:
-            # # filter outliers by keeping only values that are within two times the Median Absolute Deviation
-            # AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
-            # MAD = np.median(AD)
-            # selected_MWP_kernel_values = valid_MWP_kernel_values[AD < MAD * 3]
-            # print(f"Removed {len(valid_MWP_kernel_values) - len(selected_MWP_kernel_values)}")
-            # median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
-            # median_MWPs = np.append(median_MWPs, np.mean(valid_MWP_kernel_values))
-
-            # filter outliers by keeping only values that are within three times the Median Absolute Deviation
-            AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
-            MAD = np.median(AD)
-            MAD_score = AD / MAD if MAD else 0.0
-            selected_MWP_kernel_values = valid_MWP_kernel_values[MAD_score < 3]
-            median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
-            median_MWPs_vars = np.append(median_MWPs_vars, np.std(selected_MWP_kernel_values))
-        else:
-            median_MWPs = np.append(median_MWPs, invalid_value)
-            median_MWPs_vars = np.append(median_MWPs_vars, 1)
-
-    # get the mean MWP per iteration
-    for i in range(len(MWP_values)):
-        MWP_iteration_values = MWP_values[i]
-        valid_MWP_mask = (MWP_iteration_values < invalid_value) & (MWP_iteration_values >= 0)
-        valid_MWP_iteration_values = MWP_iteration_values[valid_MWP_mask]
-        if len(valid_MWP_iteration_values) > 0:
-            valid_MWP_times.append(np.mean(valid_MWP_iteration_values))
-        else:
-            valid_MWP_times.append(invalid_value)
-
-    # get the grandmean by taking the inverse-variance weighted average over the median MWP per kernel, invalid if one of the kernels is invalid
-    print(median_MWPs)
-    print(median_MWPs / median_MWPs_vars, np.sum(1 / median_MWPs_vars), np.std(median_MWPs / median_MWPs_vars))
-    inverse_variance_weighted_average = np.sum(median_MWPs / median_MWPs_vars) / np.sum(1 / median_MWPs_vars)
-    grandmean_MWP = inverse_variance_weighted_average
-    if np.isnan(grandmean_MWP) or len(median_MWPs[median_MWPs >= invalid_value]) > 0:
-        grandmean_MWP = invalid_value
-    return grandmean_MWP, valid_MWP_times

From 3dbe379d2a967f62eb70bda4422980107588db1f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:27:27 +0100
Subject: [PATCH 150/253] Removed bayes_opt_old as a strategy

---
 kernel_tuner/strategies/bayes_opt_old.py | 833 -----------------------
 test/strategies/test_strategies.py       |   4 +-
 2 files changed, 2 insertions(+), 835 deletions(-)
 delete mode 100644 kernel_tuner/strategies/bayes_opt_old.py

diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
deleted file mode 100644
index a55790e66..000000000
--- a/kernel_tuner/strategies/bayes_opt_old.py
+++ /dev/null
@@ -1,833 +0,0 @@
-"""Bayesian Optimization implementation from the thesis by Willemsen."""
-import itertools
-import time
-import warnings
-from copy import deepcopy
-from random import randint, shuffle
-
-import numpy as np
-
-# BO imports
-try:
-    from typing import Tuple
-
-    from scipy.stats import norm
-    from sklearn.exceptions import ConvergenceWarning
-    from sklearn.gaussian_process import GaussianProcessRegressor
-    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
-    from skopt.sampler import Lhs
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-from kernel_tuner import util
-from kernel_tuner.strategies import minimize
-
-supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
-
-
-def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """Generates normalization and denormalization dictionaries."""
-    original_to_normalized = dict()
-    normalized_to_original = dict()
-    for param_name in tune_params.keys():
-        original_to_normalized_dict = dict()
-        normalized_to_original_dict = dict()
-        for value_index, value in enumerate(tune_params[param_name]):
-            normalized_value = eps * value_index + 0.5 * eps
-            normalized_to_original_dict[normalized_value] = value
-            original_to_normalized_dict[value] = normalized_value
-        original_to_normalized[param_name] = original_to_normalized_dict
-        normalized_to_original[param_name] = normalized_to_original_dict
-    return original_to_normalized, normalized_to_original
-
-
-def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """Normalize the parameter space given a normalization dictionary."""
-    keys = list(tune_params.keys())
-    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
-    return param_space_normalized
-
-
-def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
-    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
-    pruned_tune_params_mask = list()
-    removed_tune_params = list()
-    param_names = list(tune_params.keys())
-    for index, key in enumerate(tune_params.keys()):
-        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
-        if len(tune_params[key]) > 1:
-            removed_tune_params.append(None)
-        else:
-            value = tune_params[key][0]
-            normalized = normalize_dict[param_names[index]][value]
-            removed_tune_params.append(normalized)
-    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
-        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
-    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
-    return parameter_space, removed_tune_params
-
-
-def tune(runner, kernel_options, device_options, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process. Allows setting hyperparameters via the strategy_options key.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
-    if not bayes_opt_present:
-        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed")
-
-    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
-    tune_params = tuning_options.tune_params
-    tuning_options["scaling"] = True
-    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
-
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if tuning_options.restrictions is not None:
-        tuning_options.verbose = False
-    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
-    parameter_space = list(parameter_space)
-    if len(parameter_space) < 1:
-        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
-    if len(parameter_space) == 1:
-        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
-
-    # normalize search space to [0,1]
-    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
-    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
-
-    # prune the parameter space to remove dimensions that have a constant parameter
-    if prune_parameterspace:
-        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
-    else:
-        parameter_space = list(parameter_space)
-        removed_tune_params = [None] * len(tune_params.keys())
-
-    # initialize and optimize
-    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
-    results = bo.optimize(max_fevals)
-
-    return results, runner.dev.get_environment()
-
-
-class BayesianOptimization():
-
-    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
-                 runner, opt_direction='min'):
-        time_start = time.perf_counter_ns()
-
-        # supported hyperparameter values
-        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
-        self.supported_methods = supported_methods
-        self.supported_sampling_methods = ["random", "lhs"]
-        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
-
-        def get_hyperparam(name: str, default, supported_values=list()):
-            value = tuning_options.strategy_options.get(name, default)
-            if len(supported_values) > 0 and value not in supported_values:
-                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-            return value
-
-        # get hyperparameters
-        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
-        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
-        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
-        acq = acquisition_function
-        acq_params = get_hyperparam("methodparams", {})
-        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
-        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
-        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
-        self.num_initial_samples = get_hyperparam("popsize", 20)
-        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
-        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
-        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
-
-        # set acquisition function hyperparameter defaults where missing
-        if 'explorationfactor' not in acq_params:
-            acq_params['explorationfactor'] = 'CV'
-        if 'zeta' not in acq_params:
-            acq_params['zeta'] = 1
-        if 'skip_duplicate_after' not in acq_params:
-            acq_params['skip_duplicate_after'] = 5
-
-        # set arguments
-        self.kernel_options = kernel_options
-        self.tuning_options = tuning_options
-        self.tune_params = tuning_options.tune_params
-        self.param_names = list(self.tune_params.keys())
-        self.normalized_dict = normalize_dict
-        self.denormalized_dict = denormalize_dict
-        self.runner = runner
-        self.max_threads = runner.dev.max_threads
-        self.log_timings = False
-
-        # set optimization constants
-        self.invalid_value = 1e20
-        self.opt_direction = opt_direction
-        if opt_direction == 'min':
-            self.worst_value = np.inf
-            self.argopt = np.argmin
-        elif opt_direction == 'max':
-            self.worst_value = np.NINF
-            self.argopt = np.argmax
-        else:
-            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
-
-        # set the acquisition function and surrogate model
-        self.optimize = self.__optimize
-        self.af_name = acquisition_function
-        self.af_params = acq_params
-        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
-        self.set_acquisition_function(acquisition_function)
-        self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
-
-        # set remaining values
-        self.results = []
-        self.__searchspace = searchspace
-        self.removed_tune_params = removed_tune_params
-        self.searchspace_size = len(self.searchspace)
-        self.num_dimensions = len(self.dimensions())
-        self.__current_optimum = self.worst_value
-        self.cv_norm_maximum = None
-        self.fevals = 0
-        self.__visited_num = 0
-        self.__visited_valid_num = 0
-        self.__visited_searchspace_indices = [False] * self.searchspace_size
-        self.__observations = [np.NaN] * self.searchspace_size
-        self.__valid_observation_indices = [False] * self.searchspace_size
-        self.__valid_params = list()
-        self.__valid_observations = list()
-        self.unvisited_cache = self.unvisited()
-        time_setup = time.perf_counter_ns()
-        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
-
-        # take initial sample
-        if self.num_initial_samples > 0:
-            self.initial_sample()
-            time_initial_sample = time.perf_counter_ns()
-
-        # print the timings
-        if self.log_timings:
-            time_taken_setup = round(time_setup - time_start, 3) / 1000
-            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
-            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
-            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
-
-    @property
-    def searchspace(self):
-        return self.__searchspace
-
-    @property
-    def observations(self):
-        return self.__observations
-
-    @property
-    def current_optimum(self):
-        return self.__current_optimum
-
-    @current_optimum.setter
-    def current_optimum(self, value: float):
-        self.__current_optimum = value
-
-    def is_better_than(self, a: float, b: float) -> bool:
-        """Determines which one is better depending on optimization direction."""
-        return a < b if self.opt_direction == 'min' else a > b
-
-    def is_not_visited(self, index: int) -> bool:
-        """Returns whether a searchspace index has not been visited."""
-        return not self.__visited_searchspace_indices[index]
-
-    def is_valid(self, observation: float) -> bool:
-        """Returns whether an observation is valid."""
-        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
-
-    def get_af_by_name(self, name: str):
-        """Get the basic acquisition functions by their name."""
-        basic_af_names = ['ei', 'poi', 'lcb']
-        if name == 'ei':
-            return self.af_expected_improvement
-        elif name == 'poi':
-            return self.af_probability_of_improvement
-        elif name == 'lcb':
-            return self.af_lower_confidence_bound
-        raise ValueError(f"{name} not in {basic_af_names}")
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """Set the acquisition function."""
-        if acquisition_function == 'poi':
-            self.__af = self.af_probability_of_improvement
-        elif acquisition_function == 'ei':
-            self.__af = self.af_expected_improvement
-        elif acquisition_function == 'lcb':
-            self.__af = self.af_lower_confidence_bound
-        elif acquisition_function == 'lcb-srinivas':
-            self.__af = self.af_lower_confidence_bound_srinivas
-        elif acquisition_function == 'random':
-            self.__af = self.af_random
-        elif acquisition_function == 'multi':
-            self.optimize = self.__optimize_multi
-        elif acquisition_function == 'multi-advanced':
-            self.optimize = self.__optimize_multi_advanced
-        elif acquisition_function == 'multi-fast':
-            self.optimize = self.__optimize_multi_fast
-        else:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """Set the surrogate model with a covariance function and lengthscale."""
-        if cov_kernel_name == "constantrbf":
-            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "rbf":
-            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern32":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern52":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
-        else:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_cov_kernels, cov_kernel_name))
-        self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
-
-    def valid_params_observations(self) -> Tuple[list, list]:
-        """Returns a list of valid observations and their parameter configurations."""
-        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
-        params = list()
-        observations = list()
-        for index, valid in enumerate(self.__valid_observation_indices):
-            if valid is True:
-                params.append(self.searchspace[index])
-                observations.append(self.observations[index])
-        return params, observations
-
-    def unvisited(self) -> list:
-        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
-        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
-        return params
-
-    def find_param_config_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the search space if it exists."""
-        return self.searchspace.index(param_config)
-
-    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the unvisited cache if it exists."""
-        return self.unvisited_cache.index(param_config)
-
-    def normalize_param_config(self, param_config: tuple) -> tuple:
-        """Normalizes a parameter configuration."""
-        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return normalized
-
-    def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """Denormalizes a parameter configuration."""
-        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return denormalized
-
-    def unprune_param_config(self, param_config: tuple) -> tuple:
-        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
-        unpruned = list()
-        pruned_count = 0
-        for removed in self.removed_tune_params:
-            if removed is not None:
-                unpruned.append(removed)
-            else:
-                unpruned.append(param_config[pruned_count])
-                pruned_count += 1
-        return tuple(unpruned)
-
-    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """Adjust the visited and valid index records accordingly."""
-        validity = self.is_valid(observation)
-        self.__visited_num += 1
-        self.__observations[index] = observation
-        self.__visited_searchspace_indices[index] = True
-        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
-        self.__valid_observation_indices[index] = validity
-        if validity is True:
-            self.__visited_valid_num += 1
-            self.__valid_params.append(param_config)
-            self.__valid_observations.append(observation)
-            if self.is_better_than(observation, self.current_optimum):
-                self.current_optimum = observation
-
-    def predict(self, x) -> Tuple[float, float]:
-        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
-        return self.__model.predict([x], return_std=True)
-
-    def predict_list(self, lst: list) -> Tuple[list, list, list]:
-        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            mu, std = self.__model.predict(lst, return_std=True)
-            return list(zip(mu, std)), mu, std
-
-    def fit_observations_to_model(self):
-        """Update the model based on the current list of observations."""
-        self.__model.fit(self.__valid_params, self.__valid_observations)
-
-    def evaluate_objective_function(self, param_config: tuple) -> float:
-        """Evaluates the objective function."""
-        param_config = self.unprune_param_config(param_config)
-        denormalized_param_config = self.denormalize_param_config(param_config)
-        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
-            return self.invalid_value
-        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-        self.fevals += 1
-        return val
-
-    def dimensions(self) -> list:
-        """List of parameter values per parameter."""
-        return self.tune_params.values()
-
-    def draw_random_sample(self) -> Tuple[list, int]:
-        """Draw a random sample from the unvisited parameter configurations."""
-        if len(self.unvisited_cache) < 1:
-            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
-        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
-        param_config = self.unvisited_cache[index]
-        actual_index = self.find_param_config_index(param_config)
-        return param_config, actual_index
-
-    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """Draws an LHS-distributed sample from the search space."""
-        if self.searchspace_size < num_samples:
-            raise ValueError("Can't sample more than the size of the search space")
-        if self.sampling_crit is None:
-            lhs = Lhs(lhs_type="centered", criterion=None)
-        else:
-            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
-        param_configs = lhs.generate(self.dimensions(), num_samples)
-        indices = list()
-        normalized_param_configs = list()
-        for i in range(len(param_configs) - 1):
-            try:
-                param_config = self.normalize_param_config(param_configs[i])
-                index = self.find_param_config_index(param_config)
-                indices.append(index)
-                normalized_param_configs.append(param_config)
-            except ValueError:
-                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
-                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
-                continue
-        return list(zip(normalized_param_configs, indices))
-
-    def initial_sample(self):
-        """Draws an initial sample using random sampling."""
-        if self.num_initial_samples <= 0:
-            raise ValueError("At least one initial sample is required")
-        if self.sampling_method == 'lhs':
-            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
-        elif self.sampling_method == 'random':
-            samples = list()
-        else:
-            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
-        # collect the samples
-        collected_samples = 0
-        for params, index in samples:
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            if self.is_valid(observation):
-                collected_samples += 1
-        # collect the remainder of the samples
-        while collected_samples < self.num_initial_samples:
-            params, index = self.draw_random_sample()
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            # check for validity to avoid having no actual initial samples
-            if self.is_valid(observation):
-                collected_samples += 1
-        self.fit_observations_to_model()
-        _, _, std = self.predict_list(self.unvisited_cache)
-        self.initial_sample_mean = np.mean(self.__valid_observations)
-        # Alternatively:
-        # self.initial_sample_std = np.std(self.__valid_observations)
-        # self.initial_sample_mean = np.mean(predictions)
-        self.initial_std = np.mean(std)
-        self.cv_norm_maximum = self.initial_std
-
-    def contextual_variance(self, std: list):
-        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
-        if not self.af_params['explorationfactor'] == 'CV':
-            return None
-        if self.opt_direction == 'min':
-            if self.current_optimum == self.worst_value:
-                return 0.01
-            if self.current_optimum <= 0:
-                # doesn't work well for minimization beyond 0, should that even be a thing?
-                return abs(np.mean(std) / self.current_optimum)
-            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
-            cv = np.mean(std) / improvement_over_initial_sample
-            # normalize if available
-            if self.cv_norm_maximum:
-                cv = cv / self.cv_norm_maximum
-            return cv
-        return np.mean(std) / self.current_optimum
-
-    def __optimize(self, max_fevals):
-        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
-        while self.fevals < max_fevals:
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
-            list_of_acquisition_values = self.__af(predictions, hyperparam)
-            # afterwards select the best AF value
-            best_af = self.argopt(list_of_acquisition_values)
-            candidate_params = self.unvisited_cache[best_af]
-            candidate_index = self.find_param_config_index(candidate_params)
-            observation = self.evaluate_objective_function(candidate_params)
-            self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.fit_observations_to_model()
-        return self.results
-
-    def __optimize_multi(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
-        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
-        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
-        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
-        discount_factor = self.multi_afs_discount_factor
-        # setup the registration of duplicates and runtimes
-        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
-        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
-        skip_af_index = list()
-        af_runtimes = [0, 0, 0]
-        af_observations = [list(), list(), list()]
-        initial_sample_mean = np.mean(self.__valid_observations)
-        while self.fevals < max_fevals:
-            time_start = time.perf_counter_ns()
-            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
-            aqfs = self.multi_afs
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            time_predictions = time.perf_counter_ns()
-            actual_candidate_params = list()
-            actual_candidate_indices = list()
-            actual_candidate_af_indices = list()
-            duplicate_candidate_af_indices = list()
-            duplicate_candidate_original_af_indices = list()
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                timer_start = time.perf_counter()
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                time_taken = time.perf_counter() - timer_start
-                af_runtimes[af_index] += time_taken
-                is_duplicate = best_af in actual_candidate_indices
-                if not is_duplicate:
-                    candidate_params = self.unvisited_cache[best_af]
-                    actual_candidate_params.append(candidate_params)
-                    actual_candidate_indices.append(best_af)
-                    actual_candidate_af_indices.append(af_index)
-                # register whether the AF suggested a duplicate candidate
-                duplicate_candidate_af_count[af_index].pop(0)
-                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
-                if is_duplicate:
-                    # find the index of the AF that first registered the duplicate
-                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
-                    # register that AF as duplicate as well
-                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
-                    duplicate_candidate_af_indices.append(af_index)
-                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
-            time_afs = time.perf_counter_ns()
-            # evaluate the non-duplicate candidates
-            for index, af_index in enumerate(actual_candidate_af_indices):
-                candidate_params = actual_candidate_params[index]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if observation != self.invalid_value:
-                    # we use the registered observations for maximization of the discounted reward
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
-                else:
-                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
-                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
-            for index, af_index in enumerate(duplicate_candidate_af_indices):
-                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
-                af_observations[af_index].append(original_observation)
-            self.fit_observations_to_model()
-            time_eval = time.perf_counter_ns()
-            # assert that all observation lists of non-skipped acquisition functions are of the same length
-            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
-            # find the AFs elligble for being skipped
-            candidates_for_skip = list()
-            for af_index, count in enumerate(duplicate_candidate_af_count):
-                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
-                    candidates_for_skip.append(af_index)
-            # do not skip the AF with the lowest runtime
-            if len(candidates_for_skip) > 1:
-                candidates_for_skip_discounted = list(
-                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
-                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
-                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
-                for af_index in candidates_for_skip:
-                    if af_index == af_not_to_skip:
-                        # do not skip the AF with the lowest runtime and give it a clean slate
-                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
-                        continue
-                    skip_af_index.append(af_index)
-                    if len(skip_af_index) >= len(aqfs):
-                        raise ValueError("There are no acquisition functions left! This should not happen...")
-            time_af_selection = time.perf_counter_ns()
-
-            # printing timings
-            if self.log_timings:
-                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
-                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
-                time_taken_eval = round(time_eval - time_afs, 3) / 1000
-                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
-                time_taken_total = round(time_af_selection - time_start, 3) / 1000
-                print(
-                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
-                    flush=True)
-        return self.results
-
-    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        aqfs = self.multi_afs
-        discount_factor = self.multi_afs_discount_factor
-        required_improvement_factor = self.multi_afs_required_improvement_factor
-        required_improvement_worse = 1 + required_improvement_factor
-        required_improvement_better = 1 - required_improvement_factor
-        min_required_count = self.af_params['skip_duplicate_after']
-        skip_af_index = list()
-        single_af = len(aqfs) <= len(skip_af_index) + 1
-        af_observations = [list(), list(), list()]
-        af_performs_worse_count = [0, 0, 0]
-        af_performs_better_count = [0, 0, 0]
-        while self.fevals < max_fevals:
-            if single_af:
-                return self.__optimize(max_fevals)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            observations_median = np.median(self.__valid_observations)
-            if increase_precision is False:
-                predictions, _, std = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(std)
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                if increase_precision is True:
-                    predictions, _, std = self.predict_list(self.unvisited_cache)
-                    hyperparam = self.contextual_variance(std)
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if increase_precision is True:
-                    self.fit_observations_to_model()
-                # we use the registered observations for maximization of the discounted reward
-                if observation != self.invalid_value:
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[af_index].append(reg_observation)
-                else:
-                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
-                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
-                    af_observations[af_index].append(reg_invalid_observation)
-            if increase_precision is False:
-                self.fit_observations_to_model()
-
-            # calculate the mean of discounted observations over the remaining acquisition functions
-            discounted_obs = list(
-                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
-            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
-
-            # register which AFs perform more than 10% better than average and which more than 10% worse than average
-            for af_index, discounted_observation in enumerate(discounted_obs):
-                if discounted_observation > disc_obs_mean * required_improvement_worse:
-                    af_performs_worse_count[af_index] += 1
-                elif discounted_observation < disc_obs_mean * required_improvement_better:
-                    af_performs_better_count[af_index] += 1
-
-            # find the worst AF, discounted observations is leading for a draw
-            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
-            af_index_worst = -1
-            if worst_count >= min_required_count:
-                for af_index, count in enumerate(af_performs_worse_count):
-                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
-                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
-                        af_index_worst = af_index
-
-            # skip the worst AF
-            if af_index_worst > -1:
-                skip_af_index.append(af_index_worst)
-                # reset the counts to even the playing field for the remaining AFs
-                af_performs_worse_count = [0, 0, 0]
-                af_performs_better_count = [0, 0, 0]
-                # if there is only one AF left, register as single AF
-                if len(aqfs) <= len(skip_af_index) + 1:
-                    single_af = True
-                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-                    assert len(af_indices_left) == 1
-                    self.__af = aqfs[af_indices_left[0]]
-            else:
-                # find the best AF, discounted observations is leading for a draw
-                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
-                af_index_best = -1
-                if best_count >= min_required_count:
-                    for af_index, count in enumerate(af_performs_better_count):
-                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
-                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
-                            af_index_best = af_index
-                # make the best AF single
-                if af_index_best > -1:
-                    single_af = True
-                    self.__af = aqfs[af_index_best]
-
-        return self.results
-
-    def __optimize_multi_fast(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
-        while self.fevals < max_fevals:
-            aqfs = self.multi_afs
-            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            for af in aqfs:
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.fit_observations_to_model()
-        return self.results
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function returning a randomly shuffled list for comparison."""
-        list_random = range(len(self.unvisited_cache))
-        shuffle(list_random)
-        return list_random
-
-    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Probability of Improvement (PI)."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1E-9)) for (x_mu, x_std) in predictions)
-
-        # compute probability of improvement with CDF in bulk
-        list_prob_improvement = norm.cdf(list_diff_improvement)
-
-        return list_prob_improvement
-
-    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Expected Improvement (EI)."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement, CDF and PDF in bulk
-        list_diff_improvement = list((fplus - x_mu) / (x_std + 1E-9) for (x_mu, x_std) in predictions)
-        list_cdf = norm.cdf(list_diff_improvement)
-        list_pdf = norm.pdf(list_diff_improvement)
-
-        # specify AF calculation
-        def exp_improvement(index) -> float:
-            x_mu, x_std = predictions[index]
-            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
-            return -ei
-
-        # calculate AF
-        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
-        return list_exp_improvement
-
-    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (LCB)."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        beta = hyperparam
-
-        # compute LCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
-        return list_lower_confidence_bound
-
-    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-
-        # precompute beta parameter
-        zeta = self.af_params['zeta']
-        t = self.fevals
-        d = self.num_dimensions
-        delta = hyperparam
-        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
-
-        # compute UCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
-        return list_lower_confidence_bound
-
-    def visualize_after_opt(self):
-        """Visualize the model after the optimization."""
-        print(self.__model.kernel_.get_params())
-        print(self.__model.log_marginal_likelihood())
-        import matplotlib.pyplot as plt
-        _, mu, std = self.predict_list(self.searchspace)
-        brute_force_observations = list()
-        for param_config in self.searchspace:
-            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-            if obs == self.invalid_value:
-                obs = None
-            brute_force_observations.append(obs)
-        x_axis = range(len(mu))
-        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
-        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
-        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
-        plt.legend()
-        plt.show()
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 8b2b92a45..67653190f 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -41,8 +41,8 @@ def vector_add():
 # skip some strategies if their dependencies are not installed
 strategies = []
 for s in strategy_map.keys():
-    if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower() or 'bayes_opt_old' in s.lower():
-        continue
+    if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower():
+        continue    # TODO issue warning for uninstalled dependencies?
     if 'gpytorch' in s.lower():
         strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
     elif 'botorch' in s.lower():

From dcd102ba24fcc7ba8f6c8eb2df7401287c4d0d3f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:49:28 +0100
Subject: [PATCH 151/253] Report last HIP error on error

---
 kernel_tuner/backends/hip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/backends/hip.py b/kernel_tuner/backends/hip.py
index 1a0b7ce73..831123b42 100644
--- a/kernel_tuner/backends/hip.py
+++ b/kernel_tuner/backends/hip.py
@@ -40,7 +40,7 @@ def hip_check(call_result):
     if len(result) == 1:
         result = result[0]
     if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess:
-        raise RuntimeError(str(err))
+        raise RuntimeError(str(err), hip.hipGetLastError())
     return result
 
 

From 290a8605e41418b76cda2a634c04651ce9b04855 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:52:25 +0100
Subject: [PATCH 152/253] Added docstring to ScoreObserver class

---
 kernel_tuner/backends/hypertuner.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 66634e5c0..33a0e639c 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -17,6 +17,8 @@
 
 
 class ScoreObserver(BenchmarkObserver):
+    """BenchmarkObserver subclass for registering the hyperparameter tuning score."""
+
     def __init__(self, dev):
         self.dev = dev
         self.scores = []

From 496af9410938566f89abbceb4e457e56ebd53584 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:52:44 +0100
Subject: [PATCH 153/253] Reduced cognitive complexity

---
 kernel_tuner/strategies/bayes_opt.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 451a0d5eb..66e360009 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -860,10 +860,8 @@ def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            if (
-                last_prediction_time * predict_eval_ratio <= last_eval_time
-                or last_prediction_counter >= predict_eval_ratio
-            ):
+            fit_observations = last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio
+            if fit_observations:
                 last_prediction_counter = 0
                 pred_start = time.perf_counter()
                 if last_eval_time > 0.0:

From c1c3a718f182b06a46647388d605f1fb988ba658 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 15:13:30 +0100
Subject: [PATCH 154/253] Improved development environment creation
 specification

---
 doc/source/dev-environment.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/dev-environment.rst b/doc/source/dev-environment.rst
index 570a8c970..0adb3c83e 100644
--- a/doc/source/dev-environment.rst
+++ b/doc/source/dev-environment.rst
@@ -78,7 +78,7 @@ Steps without :bash:`sudo` access (e.g. on a cluster):
     * Verify that your development environment has no missing installs or updates with :bash:`poetry install --sync --dry-run --with test`. 
 #. Check if the environment is setup correctly by running :bash:`pytest`. All tests should pass, except if you're not on a GPU node, or one or more extras has been left out in the previous step, then these tests will skip gracefully.
 #. Set Nox to use the correct backend and location:
-    * Run :bash:`conda -- create-settings-file` to automatically create a settings file. 
+    * Run :bash:`nox -- create-settings-file` to automatically create a settings file. 
     * In this settings file :bash:`noxsettings.toml`, change the :bash:`venvbackend`:
         * If you used Mamba in step 2, to :bash:`mamba`.
         * If you used Miniconda or Anaconda in step 2, to :bash:`conda`.

From 54010b4c48feae125eca0a938d68955164aef39e Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Wed, 30 Apr 2025 10:08:34 +0200
Subject: [PATCH 155/253] introduced repair technique in genetic algorithm

---
 kernel_tuner/strategies/genetic_algorithm.py | 148 ++++++++++++-------
 kernel_tuner/strategies/greedy_ils.py        |   9 +-
 test/strategies/test_genetic_algorithm.py    |  10 +-
 test/test_runners.py                         |  16 ++
 4 files changed, 125 insertions(+), 58 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index c29c150b5..404c36ed9 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -11,6 +11,7 @@
 _options = dict(
     popsize=("population size", 20),
     maxiter=("maximum number of generations", 100),
+    constraint_aware=("constraint-aware optimization (True/False)", False),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
     mutation_chance=("chance to mutate is 1 in mutation_chance", 10),
 )
@@ -19,13 +20,15 @@
 def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
-    pop_size, generations, method, mutation_chance = common.get_options(options, _options)
+    pop_size, generations, constraint_aware, method, mutation_chance = common.get_options(options, _options)
     crossover = supported_methods[method]
 
+    GA = GeneticAlgorithm(pop_size, searchspace, constraint_aware, method, mutation_chance)
+
     best_score = 1e20
     cost_func = CostFunc(searchspace, tuning_options, runner)
 
-    population = list(list(p) for p in searchspace.get_random_sample(pop_size))
+    population = GA.generate_population()
 
     for generation in range(generations):
 
@@ -51,18 +54,19 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         if tuning_options.verbose:
             print("Generation %d, best_score %f" % (generation, best_score))
 
+        # build new population for next generation
         population = []
 
         # crossover and mutate
         while len(population) < pop_size:
-            dna1, dna2 = weighted_choice(weighted_population, 2)
+            dna1, dna2 = GA.weighted_choice(weighted_population, 2)
 
-            children = crossover(dna1, dna2)
+            children = GA.crossover(dna1, dna2)
 
             for child in children:
-                child = mutate(child, mutation_chance, searchspace)
+                child = GA.mutate(child)
 
-                if child not in population and searchspace.is_param_config_valid(tuple(child)):
+                if child not in population:
                     population.append(child)
 
                 if len(population) >= pop_size:
@@ -75,57 +79,94 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 tune.__doc__ = common.get_strategy_docstring("Genetic Algorithm", _options)
 
-
-def weighted_choice(population, n):
-    """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected."""
-
-    def random_index_betavariate(pop_size):
-        # has a higher probability of returning index of item at the head of the list
-        alpha = 1
-        beta = 2.5
-        return int(random.betavariate(alpha, beta) * pop_size)
-
-    def random_index_weighted(pop_size):
-        """Use weights to increase probability of selection."""
-        weights = [w for _, w in population]
-        # invert because lower is better
-        inverted_weights = [1.0 / w for w in weights]
-        prefix_sum = np.cumsum(inverted_weights)
-        total_weight = sum(inverted_weights)
-        randf = random.random() * total_weight
-        # return first index of prefix_sum larger than random number
-        return next(i for i, v in enumerate(prefix_sum) if v > randf)
-
-    random_index = random_index_betavariate
-
-    indices = [random_index(len(population)) for _ in range(n)]
-    chosen = []
-    for ind in indices:
-        while ind in chosen:
-            ind = random_index(len(population))
-        chosen.append(ind)
-
-    return [population[ind][0] for ind in chosen]
-
-
-def mutate(dna, mutation_chance, searchspace: Searchspace, cache=True):
-    """Mutate DNA with 1/mutation_chance chance."""
-    # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
-    if int(random.random() * mutation_chance) == 0:
-        if cache:
-            neighbors = searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
-        else:
-            neighbors = searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
-        if len(neighbors) > 0:
-            return list(random.choice(neighbors))
-    return dna
+class GeneticAlgorithm:
+
+    def __init__(self, pop_size, searchspace, constraint_aware=False, method="uniform", mutation_chance=10):
+        self.pop_size = pop_size
+        self.searchspace = searchspace
+        self.constraint_aware = constraint_aware
+        self.crossover_method = supported_methods[method]
+        self.mutation_chance = mutation_chance
+
+    def generate_population(self):
+        """ Constraint-aware population creation method """
+        return list(list(p) for p in self.searchspace.get_random_sample(self.pop_size))
+
+    def crossover(self, dna1, dna2):
+        """ Apply selected crossover method, repair dna if constraint-aware """
+        dna1, dna2 = self.crossover_method(dna1, dna2)
+        if self.constraint_aware:
+            return self.repair(dna1), self.repair(dna2)
+        return dna1, dna2
+
+    def weighted_choice(self, population, n):
+        """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected."""
+
+        def random_index_betavariate(pop_size):
+            # has a higher probability of returning index of item at the head of the list
+            alpha = 1
+            beta = 2.5
+            return int(random.betavariate(alpha, beta) * pop_size)
+
+        def random_index_weighted(pop_size):
+            """Use weights to increase probability of selection."""
+            weights = [w for _, w in population]
+            # invert because lower is better
+            inverted_weights = [1.0 / w for w in weights]
+            prefix_sum = np.cumsum(inverted_weights)
+            total_weight = sum(inverted_weights)
+            randf = random.random() * total_weight
+            # return first index of prefix_sum larger than random number
+            return next(i for i, v in enumerate(prefix_sum) if v > randf)
+
+        random_index = random_index_betavariate
+
+        indices = [random_index(len(population)) for _ in range(n)]
+        chosen = []
+        for ind in indices:
+            while ind in chosen:
+                ind = random_index(len(population))
+            chosen.append(ind)
+
+        return [population[ind][0] for ind in chosen]
+
+
+    def mutate(self, dna, cache=False):
+        """Mutate DNA with 1/mutation_chance chance."""
+        # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
+        if int(random.random() * self.mutation_chance) == 0:
+            if cache:
+                neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+            else:
+                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
+            if len(neighbors) > 0:
+                return list(random.choice(neighbors))
+        return dna
+
+
+    def repair(self, dna):
+        """ It is possible that crossover methods yield a configuration that is not valid. """
+        if not self.searchspace.is_param_config_valid(tuple(dna)):
+            # dna is not valid, try to repair it
+            # search for valid configurations neighboring this config
+            # start from strictly-adjacent to increasingly allowing more neighbors
+            for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method=neighbor_method)
+
+                # if we have found valid neighboring configurations, select one at random
+                if len(neighbors) > 0:
+                    new_dna = list(random.choice(neighbors))
+                    print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
+                    return new_dna
+
+        return dna
 
 
 def single_point_crossover(dna1, dna2):
     """Crossover dna1 and dna2 at a random index."""
     # check if you can do the crossovers using the neighbor index: check which valid parameter configuration is closest to the crossover, probably best to use "adjacent" as it is least strict?
     pos = int(random.random() * (len(dna1)))
-    return (dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:])
+    return dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:]
 
 
 def two_point_crossover(dna1, dna2):
@@ -137,7 +178,7 @@ def two_point_crossover(dna1, dna2):
     pos1, pos2 = sorted(random.sample(list(range(start, end)), 2))
     child1 = dna1[:pos1] + dna2[pos1:pos2] + dna1[pos2:]
     child2 = dna2[:pos1] + dna1[pos1:pos2] + dna2[pos2:]
-    return (child1, child2)
+    return child1, child2
 
 
 def uniform_crossover(dna1, dna2):
@@ -168,7 +209,7 @@ def disruptive_uniform_crossover(dna1, dna2):
                     child1[ind] = dna2[ind]
                     child2[ind] = dna1[ind]
                     swaps += 1
-    return (child1, child2)
+    return child1, child2
 
 
 supported_methods = {
@@ -177,3 +218,4 @@ def disruptive_uniform_crossover(dna1, dna2):
     "uniform": uniform_crossover,
     "disruptive_uniform": disruptive_uniform_crossover,
 }
+
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
index a4c521746..c620ab925 100644
--- a/kernel_tuner/strategies/greedy_ils.py
+++ b/kernel_tuner/strategies/greedy_ils.py
@@ -1,9 +1,9 @@
 """A simple greedy iterative local search algorithm for parameter search."""
+import random
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
-from kernel_tuner.strategies.genetic_algorithm import mutate
 from kernel_tuner.strategies.hillclimbers import base_hillclimb
 
 _options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"),
@@ -58,9 +58,14 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 tune.__doc__ = common.get_strategy_docstring("Greedy Iterative Local Search (ILS)", _options)
 
+def mutate(indiv, searchspace: Searchspace):
+    neighbors = searchspace.get_neighbors_no_cache(tuple(indiv), neighbor_method="Hamming")
+    return list(random.choice(neighbors))
+
+
 def random_walk(indiv, permutation_size, no_improve, last_improve, searchspace: Searchspace):
     if last_improve >= no_improve:
         return searchspace.get_random_sample(1)[0]
     for _ in range(permutation_size):
-        indiv = mutate(indiv, 0, searchspace, cache=False)
+        indiv = mutate(indiv, searchspace)
     return indiv
diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
index cb07f8d7f..940150796 100644
--- a/test/strategies/test_genetic_algorithm.py
+++ b/test/strategies/test_genetic_algorithm.py
@@ -14,10 +14,12 @@ def test_weighted_choice():
     pop = searchspace.get_random_sample(pop_size)
     weighted_pop = [[p, i] for i, p in enumerate(pop)]
 
-    result = ga.weighted_choice(weighted_pop, 1)
+    GA = ga.GeneticAlgorithm(pop_size, searchspace)
+
+    result = GA.weighted_choice(weighted_pop, 1)
     assert result[0] in pop
 
-    result = ga.weighted_choice(weighted_pop, 2)
+    result = GA.weighted_choice(weighted_pop, 2)
     print(result)
     assert result[0] in pop
     assert result[1] in pop
@@ -43,7 +45,9 @@ def test_random_population():
 def test_mutate():
     pop = searchspace.get_random_sample(1)
 
-    mutant = ga.mutate(pop[0], 10, searchspace)
+    GA = ga.GeneticAlgorithm(1, searchspace)
+
+    mutant = GA.mutate(pop[0])
     assert len(pop[0]) == len(mutant)
     assert mutant[0] in tune_params["x"]
     assert mutant[1] in tune_params["y"]
diff --git a/test/test_runners.py b/test/test_runners.py
index 527c1d252..dd4a7f52b 100644
--- a/test/test_runners.py
+++ b/test/test_runners.py
@@ -140,6 +140,22 @@ def test_diff_evo(env):
     assert len(result) > 0
 
 
+def test_constraint_aware_GA(env):
+    options = dict(method="uniform",
+                   constraint_aware=True,
+                   popsize=5,
+                   maxiter=2,
+                   mutation_chance=10,
+                   max_fevals=10)
+    result, _ = tune_kernel(*env,
+                            strategy="genetic_algorithm",
+                            strategy_options=options,
+                            verbose=True,
+                            cache=cache_filename,
+                            simulation_mode=True)
+    assert len(result) > 0
+
+
 @skip_if_no_pycuda
 def test_time_keeping(env):
     kernel_name, kernel_string, size, args, tune_params = env

From 71e3de8b623982c6d80e411e36f94c5df9c95199 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Wed, 30 Apr 2025 22:43:55 +0200
Subject: [PATCH 156/253] added non-constraint-aware initialization and
 mutation for comparison

---
 kernel_tuner/strategies/genetic_algorithm.py | 40 ++++++++++++++++----
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 404c36ed9..19f399dc6 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -11,7 +11,7 @@
 _options = dict(
     popsize=("population size", 20),
     maxiter=("maximum number of generations", 100),
-    constraint_aware=("constraint-aware optimization (True/False)", False),
+    constraint_aware=("constraint-aware optimization (True/False)", True),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
     mutation_chance=("chance to mutate is 1 in mutation_chance", 10),
 )
@@ -36,7 +36,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         weighted_population = []
         for dna in population:
             try:
-                time = cost_func(dna, check_restrictions=False)
+                # if we are not constraint-aware we should check restrictions upon evaluation
+                time = cost_func(dna, check_restrictions=not constraint_aware)
             except util.StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
@@ -84,13 +85,24 @@ class GeneticAlgorithm:
     def __init__(self, pop_size, searchspace, constraint_aware=False, method="uniform", mutation_chance=10):
         self.pop_size = pop_size
         self.searchspace = searchspace
+        self.tune_params = searchspace.tune_params.copy()
         self.constraint_aware = constraint_aware
         self.crossover_method = supported_methods[method]
         self.mutation_chance = mutation_chance
 
     def generate_population(self):
         """ Constraint-aware population creation method """
-        return list(list(p) for p in self.searchspace.get_random_sample(self.pop_size))
+        if self.constraint_aware:
+            pop = list(list(p) for p in self.searchspace.get_random_sample(self.pop_size))
+        else:
+            pop = []
+            dna_size = len(self.tune_params)
+            for _ in range(self.pop_size):
+                dna = []
+                for key in self.tune_params:
+                    dna.append(random.choice(self.tune_params[key]))
+                pop.append(dna)
+        return pop
 
     def crossover(self, dna1, dna2):
         """ Apply selected crossover method, repair dna if constraint-aware """
@@ -135,12 +147,24 @@ def mutate(self, dna, cache=False):
         """Mutate DNA with 1/mutation_chance chance."""
         # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
         if int(random.random() * self.mutation_chance) == 0:
-            if cache:
-                neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+            if self.constraint_aware:
+                if cache:
+                    neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+                else:
+                    neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
+                if len(neighbors) > 0:
+                    return list(random.choice(neighbors))
             else:
-                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
-            if len(neighbors) > 0:
-                return list(random.choice(neighbors))
+                # select a tunable parameter at random
+                mutate_index = random.randint(0, len(self.tune_params)-1)
+                mutate_key = list(self.tune_params.keys())[mutate_index]
+                # get all possible values for this parameter and remove current value
+                new_val_options = self.tune_params[mutate_key].copy()
+                new_val_options.remove(dna[mutate_index])
+                # pick new value at random
+                if len(new_val_options) > 0:
+                    new_val = random.choice(new_val_options)
+                    dna[mutate_index] = new_val
         return dna
 
 

From 67a5070a9c752e946dab03e14727713ce1de5620 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Thu, 1 May 2025 08:50:57 +0200
Subject: [PATCH 157/253] fix test_mutate

---
 test/strategies/test_genetic_algorithm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
index 940150796..d16ad11ce 100644
--- a/test/strategies/test_genetic_algorithm.py
+++ b/test/strategies/test_genetic_algorithm.py
@@ -43,10 +43,11 @@ def test_random_population():
 
 
 def test_mutate():
-    pop = searchspace.get_random_sample(1)
 
     GA = ga.GeneticAlgorithm(1, searchspace)
 
+    pop = GA.generate_population()
+
     mutant = GA.mutate(pop[0])
     assert len(pop[0]) == len(mutant)
     assert mutant[0] in tune_params["x"]

From 939ea19a8e7201f5ac34111cfa82a7e247767edc Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Mon, 12 May 2025 15:58:35 +0200
Subject: [PATCH 158/253] constraint-aware variants for pso, firefly, and sa

---
 kernel_tuner/strategies/common.py             | 37 +++++++++-
 kernel_tuner/strategies/firefly_algorithm.py  | 12 ++--
 kernel_tuner/strategies/pso.py                | 12 ++--
 .../strategies/simulated_annealing.py         | 69 +++++++++++++++----
 4 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index d01eae937..76ad8a568 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -3,6 +3,7 @@
 from time import perf_counter
 
 import numpy as np
+from scipy.spatial import distance
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
@@ -88,8 +89,17 @@ def __call__(self, x, check_restrictions=True):
 
         # else check if this is a legal (non-restricted) configuration
         if check_restrictions and self.searchspace.restrictions:
+            legal = self.searchspace.is_param_config_valid(tuple(params))
             params_dict = dict(zip(self.searchspace.tune_params.keys(), params))
-            legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose)
+
+            if "constraint_aware" in self.tuning_options.strategy_options and self.tuning_options.strategy_options["constraint_aware"]:
+                # attempt to repair
+                new_params = unscale_and_snap_to_nearest_valid(x, params, self.searchspace, self.tuning_options.eps)
+                if new_params:
+                    params = new_params
+                    legal = True
+                    x_int = ",".join([str(i) for i in params])
+
             if not legal:
                 result = params_dict
                 result[self.tuning_options.objective] = util.InvalidConfig()
@@ -243,3 +253,28 @@ def scale_from_params(params, tune_params, eps):
     for i, v in enumerate(tune_params.values()):
         x[i] = 0.5 * eps + v.index(params[i])*eps
     return x
+
+
+
+def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps):
+    """Helper func to snap to the nearest valid configuration"""
+
+    # params is nearest unscaled point, but is not valid
+    neighbors = get_neighbors(params, searchspace)
+
+    if neighbors:
+        # sort on distance to x
+        neighbors.sort(key=lambda y: distance.euclidean(x,scale_from_params(y, searchspace.tune_params, eps)))
+
+        # return closest valid neighbor
+        return neighbors[0]
+
+    return []
+
+
+def get_neighbors(params, searchspace):
+    for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+        neighbors = searchspace.get_neighbors_no_cache(tuple(params), neighbor_method=neighbor_method)
+        if len(neighbors) > 0:
+            return neighbors
+    return []
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
index dc43aae6f..9971df047 100644
--- a/kernel_tuner/strategies/firefly_algorithm.py
+++ b/kernel_tuner/strategies/firefly_algorithm.py
@@ -13,7 +13,8 @@
                        maxiter=("Maximum number of iterations", 100),
                        B0=("Maximum attractiveness", 1.0),
                        gamma=("Light absorption coefficient", 1.0),
-                       alpha=("Randomization parameter", 0.2))
+                       alpha=("Randomization parameter", 0.2),
+                       constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
@@ -23,7 +24,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     # using this instead of get_bounds because scaling is used
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
-    num_particles, maxiter, B0, gamma, alpha = common.get_options(tuning_options.strategy_options, _options)
+    num_particles, maxiter, B0, gamma, alpha, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
 
     best_score_global = sys.float_info.max
     best_position_global = []
@@ -34,9 +35,10 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         swarm.append(Firefly(bounds))
 
     # ensure particles start from legal points
-    population = list(list(p) for p in searchspace.get_random_sample(num_particles))
-    for i, particle in enumerate(swarm):
-        particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
+    if constraint_aware:
+        population = list(list(p) for p in searchspace.get_random_sample(num_particles))
+        for i, particle in enumerate(swarm):
+            particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
 
     # compute initial intensities
     for j in range(num_particles):
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 5b0df1429..efcd63815 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -13,7 +13,8 @@
                        maxiter=("Maximum number of iterations", 100),
                        w=("Inertia weight constant", 0.5),
                        c1=("Cognitive constant", 2.0),
-                       c2=("Social constant", 1.0))
+                       c2=("Social constant", 1.0),
+                       constraint_aware=("constraint-aware optimization (True/False)", False))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
@@ -24,7 +25,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
 
-    num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options)
+    num_particles, maxiter, w, c1, c2, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
 
     best_score_global = sys.float_info.max
     best_position_global = []
@@ -35,9 +36,10 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         swarm.append(Particle(bounds))
 
     # ensure particles start from legal points
-    population = list(list(p) for p in searchspace.get_random_sample(num_particles))
-    for i, particle in enumerate(swarm):
-        particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
+    if constraint_aware:
+        population = list(list(p) for p in searchspace.get_random_sample(num_particles))
+        for i, particle in enumerate(swarm):
+            particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
 
     # start optimization
     for i in range(maxiter):
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index dce929b7b..b380e5efb 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -10,16 +10,17 @@
 from kernel_tuner.strategies.common import CostFunc
 
 _options = dict(T=("Starting temperature", 1.0),
-                       T_min=("End temperature", 0.001),
-                       alpha=("Alpha parameter", 0.995),
-                       maxiter=("Number of iterations within each annealing step", 1))
+                T_min=("End temperature", 0.001),
+                alpha=("Alpha parameter", 0.995),
+                maxiter=("Number of iterations within each annealing step", 1),
+                constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     # SA works with real parameter values and does not need scaling
     cost_func = CostFunc(searchspace, tuning_options, runner)
 
     # optimization parameters
-    T, T_min, alpha, niter = common.get_options(tuning_options.strategy_options, _options)
+    T, T_min, alpha, niter, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
     T_start = T
 
     # compute how many iterations would be needed to complete the annealing schedule
@@ -30,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     max_feval = tuning_options.strategy_options.get("max_fevals", max_iter)
 
     # get random starting point and evaluate cost
-    pos = list(searchspace.get_random_sample(1)[0])
+    pos = generate_starting_point(searchspace, constraint_aware)
     old_cost = cost_func(pos, check_restrictions=False)
 
     # main optimization loop
@@ -46,9 +47,9 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         for _ in range(niter):
 
-            new_pos = neighbor(pos, searchspace)
+            new_pos = neighbor(pos, searchspace, constraint_aware)
             try:
-                new_cost = cost_func(new_pos, check_restrictions=False)
+                new_cost = cost_func(new_pos, check_restrictions=not constraint_aware)
             except util.StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
@@ -73,7 +74,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             stuck = 0
         c_old = c
         if stuck > 100:
-            pos = list(searchspace.get_random_sample(1)[0])
+            pos = generate_starting_point(searchspace, constraint_aware)
             stuck = 0
 
         # safeguard
@@ -103,11 +104,49 @@ def acceptance_prob(old_cost, new_cost, T, tuning_options):
     return np.exp(((old_cost-new_cost)/old_cost)/T)
 
 
-def neighbor(pos, searchspace: Searchspace):
+def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
     """Return a random neighbor of pos."""
-    # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions
-    neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent')
-    if len(neighbors) > 0:
-        return list(random.choice(neighbors))
-    # if there are no neighbors, return a random configuration
-    return list(searchspace.get_random_sample(1)[0])
+
+    if constraint_aware:
+        # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions
+        neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent')
+        if len(neighbors) > 0:
+            return list(random.choice(neighbors))
+        # if there are no neighbors, return a random configuration
+        return list(searchspace.get_random_sample(1)[0])
+
+    else:
+        tune_params = searchspace.tune_params
+        size = len(pos)
+        pos_out = []
+        # random mutation
+        # expected value is set that values all dimensions attempt to get mutated
+        for i in range(size):
+            key = list(tune_params.keys())[i]
+            values = tune_params[key]
+
+            if random.random() < 0.2:  #replace with random value
+                new_value = random_val(i, tune_params)
+            else: #adjacent value
+                ind = values.index(pos[i])
+                if random.random() > 0.5:
+                    ind += 1
+                else:
+                    ind -= 1
+                ind = min(max(ind, 0), len(values)-1)
+                new_value = values[ind]
+
+            pos_out.append(new_value)
+        return pos_out
+
+def random_val(index, tune_params):
+    """return a random value for a parameter"""
+    key = list(tune_params.keys())[index]
+    return random.choice(tune_params[key])
+
+def generate_starting_point(searchspace: Searchspace, constraint_aware=True):
+    if constraint_aware:
+        return list(searchspace.get_random_sample(1)[0])
+    else:
+        tune_params = searchspace.tune_params
+        return [random_val(i, tune_params) for i in range(len(tune_params))]

From b358265ba8c13af18fd684e105cb5a636f55defa Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Mon, 12 May 2025 16:01:28 +0200
Subject: [PATCH 159/253] remove unused variable

---
 kernel_tuner/strategies/genetic_algorithm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 19f399dc6..3932baaa1 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -21,7 +21,6 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
     pop_size, generations, constraint_aware, method, mutation_chance = common.get_options(options, _options)
-    crossover = supported_methods[method]
 
     GA = GeneticAlgorithm(pop_size, searchspace, constraint_aware, method, mutation_chance)
 

From 2d24ae97126f82bfad395447b9e11bb984d31f0b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 00:42:17 +0200
Subject: [PATCH 160/253] Added objective performance keys

---
 kernel_tuner/backends/hypertuner.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 33a0e639c..a02e79aca 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -69,22 +69,26 @@ def compile(self, kernel_instance):
             {
                 "name": "dedispersion_milo",
                 "folder": folder,
-                "input_file": "dedispersion_milo.json"
+                "input_file": "dedispersion_milo.json",
+                "objective_performance_keys": ["time"]
             },
             {
                 "name": "hotspot_milo",
                 "folder": folder,
-                "input_file": "hotspot_milo.json"
+                "input_file": "hotspot_milo.json",
+                "objective_performance_keys": ["GFLOP/s"]
             },
             {
                 "name": "convolution_milo",
                 "folder": folder,
-                "input_file": "convolution_milo.json"
+                "input_file": "convolution_milo.json",
+                "objective_performance_keys": ["time"]
             },
             {
                 "name": "gemm_milo",
                 "folder": folder,
-                "input_file": "gemm_milo.json"
+                "input_file": "gemm_milo.json",
+                "objective_performance_keys": ["time"]
             }
         ]
 

From 77676c8f4937851e78b30d3c77441718a9805cd4 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 10:56:48 +0200
Subject: [PATCH 161/253] Support for time-based cutoff with T1 format

---
 kernel_tuner/interface.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index fc678fdc9..db8ab59ae 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -891,10 +891,14 @@ def tune_kernel_T1(
             strategy_options[attribute["Name"]] = attribute["Value"]
     if "Budget" in inputs:
         budget = inputs["Budget"][0]
-        assert budget["Type"] == "ConfigurationCount"
         if strategy_options is None:
             strategy_options = {}
-        strategy_options["max_fevals"] = budget["BudgetValue"]
+        if budget["Type"] == "ConfigurationCount":
+            strategy_options["max_fevals"] = budget["BudgetValue"]
+        elif budget["Type"] == "TuningDuration":
+            strategy_options["time_limit"] = budget["BudgetValue"]  # both are in seconds
+        else:
+            raise NotImplementedError(f"Budget type in {budget} is not supported")
 
     # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:
@@ -971,7 +975,6 @@ def tune_kernel_T1(
             raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}")
 
     # tune with the converted inputs
-    # TODO get_t4_results calls once available in T1
     results, env = tune_kernel(
         kernel_name,
         kernel_source,

From 919626647f83cd90c3a266a5f1a21cf7d2763305 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 17:09:33 +0200
Subject: [PATCH 162/253] Improvements to constraint-aware strategies

---
 kernel_tuner/strategies/genetic_algorithm.py | 28 ++++++++++----------
 kernel_tuner/strategies/pso.py               |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 026202d0f..8c6fbde41 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -12,32 +12,31 @@
 _options = dict(
     popsize=("population size", 30),
     maxiter=("maximum number of generations", 30),
-    constraint_aware=("constraint-aware optimization (True/False)", True),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
     mutation_chance=("chance to mutate is 1 in mutation_chance", 20),
+    constraint_aware=("constraint-aware optimization (True/False)", True),
 )
 
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
-    pop_size, generations, constraint_aware, method, mutation_chance = common.get_options(options, _options)
+    pop_size, generations, method, mutation_chance, constraint_aware = common.get_options(options, _options)
 
-    GA = GeneticAlgorithm(pop_size, searchspace, constraint_aware, method, mutation_chance)
+    # if necessary adjust the popsize to a sensible value based on search space size
+    pop_size = min(round((searchspace.size / generations) * 3), pop_size)
 
-    # if left to the default, adjust the popsize to a sensible value for small search spaces
-    if pop_size == _options["popsize"][1]:
-        pop_size = min(round(searchspace.size / 2), pop_size)
-    else:
-        # otherwise, just make sure it doesn't exceed the search space size
-        pop_size = min(searchspace.size, pop_size)
+    GA = GeneticAlgorithm(pop_size, searchspace, method, mutation_chance, constraint_aware)
 
     best_score = 1e20
     cost_func = CostFunc(searchspace, tuning_options, runner)
+    num_evaluated = 0
 
     population = GA.generate_population()
 
     for generation in range(generations):
+        if any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]):
+            raise ValueError(f"Generation {generation}/{generations}, population validity: {[searchspace.is_param_config_valid(tuple(dna)) for dna in population]}")
 
         # determine fitness of population members
         weighted_population = []
@@ -45,7 +44,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             try:
                 # if we are not constraint-aware we should check restrictions upon evaluation
                 time = cost_func(dna, check_restrictions=not constraint_aware)
-            except util.StopCriterionReached as e:
+                num_evaluated += 1
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
@@ -68,7 +68,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         population = []
 
         # crossover and mutate
-        while len(population) < pop_size:
+        while len(population) < pop_size and searchspace.size > num_evaluated + len(population):
             dna1, dna2 = GA.weighted_choice(weighted_population, 2)
 
             children = GA.crossover(dna1, dna2)
@@ -76,7 +76,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             for child in children:
                 child = GA.mutate(child)
 
-                if child not in population:
+                if child not in population and searchspace.is_param_config_valid(tuple(child)):
                     population.append(child)
 
                 if len(population) >= pop_size:
@@ -91,13 +91,13 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 class GeneticAlgorithm:
 
-    def __init__(self, pop_size, searchspace, constraint_aware=False, method="uniform", mutation_chance=10):
+    def __init__(self, pop_size, searchspace, method="uniform", mutation_chance=10, constraint_aware=True):
         self.pop_size = pop_size
         self.searchspace = searchspace
         self.tune_params = searchspace.tune_params.copy()
-        self.constraint_aware = constraint_aware
         self.crossover_method = supported_methods[method]
         self.mutation_chance = mutation_chance
+        self.constraint_aware = constraint_aware
 
     def generate_population(self):
         """ Constraint-aware population creation method """
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 0fd9c874d..a7b75ed48 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -16,7 +16,7 @@
     w=("Inertia weight constant", 0.5),
     c1=("Cognitive constant", 3.0),
     c2=("Social constant", 1.5),
-    constraint_aware=("constraint-aware optimization (True/False)", False))
+    constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 

From 83df9482826bc3373a450da66c09f0f5be5d841b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 17:14:11 +0200
Subject: [PATCH 163/253] Implemented passing settings to hyperparameter tuner,
 improved hyperparam test, improved defaults

---
 kernel_tuner/backends/hypertuner.py | 87 +++++++++++++++++------------
 kernel_tuner/core.py                |  7 ++-
 test/test_hyper.py                  | 21 ++++++-
 3 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index a02e79aca..d4355d5ba 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -35,37 +35,18 @@ class HypertunerFunctions(Backend):
     """Class for executing hyperparameter tuning."""
     units = {}
 
-    def __init__(self, iterations):
+    def __init__(self, iterations, compiler_options=None):
         self.iterations = iterations
+        self.compiler_options = compiler_options
         self.observers = [ScoreObserver(self)]
         self.name = platform.processor()
         self.max_threads = 1024
         self.last_score = None
 
-        # set the environment options
-        env = dict()
-        env["iterations"] = self.iterations
-        self.env = env
-
-        # check for the methodology package
-        if methodology_available is not True:
-            raise ImportError("Unable to import the autotuning methodology, run `pip install autotuning_methodology`.")
-
-    def ready_argument_list(self, arguments):
-        arglist = super().ready_argument_list(arguments)
-        if arglist is None:
-            arglist = []
-        return arglist
-    
-    def compile(self, kernel_instance):
-        super().compile(kernel_instance)
-        path = Path(__file__).parent.parent.parent / "hyperparamtuning"
-        path.mkdir(exist_ok=True)
-
-        # TODO get applications & GPUs args from benchmark
-        gpus = ["A100", "A4000", "MI250X"]
+        # set the defaults
+        self.gpus = ["A100", "A4000", "MI250X"]
         folder = "../autotuning_methodology/benchmark_hub/kernels"
-        applications = [
+        self.applications = [
             {
                 "name": "dedispersion_milo",
                 "folder": folder,
@@ -91,6 +72,51 @@ def compile(self, kernel_instance):
                 "objective_performance_keys": ["time"]
             }
         ]
+        # any additional settings
+        self.override = { 
+            "experimental_groups_defaults": { 
+                "repeats": 25,
+                "samples": self.iterations,
+                "minimum_fraction_of_budget_valid": 0.01, 
+            },
+            "statistics_settings": {
+                "cutoff_percentile": 0.95,
+                "cutoff_percentile_start": 0.01,
+                "cutoff_type": "time",
+                "objective_time_keys": [
+                    "all"
+                ]
+            }
+        }
+
+        # override the defaults with compiler options if provided
+        if self.compiler_options is not None:
+            if "gpus" in self.compiler_options:
+                self.gpus = self.compiler_options["gpus"]
+            if "applications" in self.compiler_options:
+                self.applications = self.compiler_options["applications"]
+            if "override" in self.compiler_options:
+                self.override = self.compiler_options["override"]
+
+        # set the environment options
+        env = dict()
+        env["iterations"] = self.iterations
+        self.env = env
+
+        # check for the methodology package
+        if methodology_available is not True:
+            raise ImportError("Unable to import the autotuning methodology, run `pip install autotuning_methodology`.")
+
+    def ready_argument_list(self, arguments):
+        arglist = super().ready_argument_list(arguments)
+        if arglist is None:
+            arglist = []
+        return arglist
+    
+    def compile(self, kernel_instance):
+        super().compile(kernel_instance)
+        path = Path(__file__).parent.parent.parent / "hyperparamtuning"
+        path.mkdir(exist_ok=True)
 
         # strategy settings
         strategy: str = kernel_instance.arguments[0]
@@ -104,18 +130,9 @@ def compile(self, kernel_instance):
             'search_method_hyperparameters': hyperparams
         }]
 
-        # any additional settings
-        override = { 
-            "experimental_groups_defaults": { 
-                "repeats": 25,
-                "samples": self.iterations,
-                "minimum_fraction_of_budget_valid": 0.01, 
-            }
-        }
-
         name = kernel_instance.name if len(kernel_instance.name) > 0 else kernel_instance.kernel_source.kernel_name
-        experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, applications, gpus, 
-                                                        override=override, generate_unique_file=True, overwrite_existing_file=True)
+        experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, self.applications, self.gpus, 
+                                                        override=self.override, generate_unique_file=True, overwrite_existing_file=True)
         return str(experiments_filepath)
     
     def start_event(self):
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index a7a642da6..c203723b7 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -324,10 +324,13 @@ def __init__(
                 observers=observers,
             )
         elif lang.upper() == "HYPERTUNER":
-            dev = HypertunerFunctions(iterations=iterations)
+            dev = HypertunerFunctions(
+                iterations=iterations,
+                compiler_options=compiler_options
+            )
             self.requires_warmup = False
         else:
-            raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")
+            raise NotImplementedError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")
         self.dev = dev
 
         # look for NVMLObserver and TegraObserver in observers, if present, enable special tunable parameters through nvml/tegra
diff --git a/test/test_hyper.py b/test/test_hyper.py
index d34294585..f0dcdae5b 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -15,6 +15,25 @@ def test_hyper(env):
 
     target_strategy = "genetic_algorithm"
 
-    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True, cache=None)
+    compiler_options = {
+        "gpus": ["A100", "MI250X"],
+        "override": { 
+            "experimental_groups_defaults": { 
+                "repeats": 1,
+                "samples": 1,
+                "minimum_fraction_of_budget_valid": 0.01, 
+            },
+            "statistics_settings": {
+                "cutoff_percentile": 0.90,
+                "cutoff_percentile_start": 0.01,
+                "cutoff_type": "time",
+                "objective_time_keys": [
+                    "all"
+                ]
+            }
+        }
+    }
+
+    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, compiler_options=compiler_options, verbose=True, cache=None)
     assert len(result) == 2
     assert 'best_config' in env

From f6811ab712863738182ab986256c3a8df4cb0e7d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 14 May 2025 13:31:52 +0200
Subject: [PATCH 164/253] Added firefly to hyperparameter tuning, various minor
 improvements

---
 kernel_tuner/backends/hypertuner.py          | 3 ++-
 kernel_tuner/hyper.py                        | 8 ++++++++
 kernel_tuner/strategies/genetic_algorithm.py | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index d4355d5ba..e33a9087d 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -77,7 +77,8 @@ def __init__(self, iterations, compiler_options=None):
             "experimental_groups_defaults": { 
                 "repeats": 25,
                 "samples": self.iterations,
-                "minimum_fraction_of_budget_valid": 0.01, 
+                "minimum_fraction_of_budget_valid": 0.1,
+                "minimum_number_of_valid_search_iterations": 10,
             },
             "statistics_settings": {
                 "cutoff_percentile": 0.95,
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index ed61558e5..9942a2414 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -107,6 +107,14 @@ def put_if_not_present(target_dict, key, value):
             'c1': [1.0, 2.0, 3.0],
             'c2': [0.5, 1.0, 1.5]
         }
+    elif strategy_to_tune.lower() == "firefly_algorithm":
+        hyperparams = {
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+            'B0': [0.5, 1.0, 1.5],
+            'gamma': [0.1, 0.25, 0.5],
+            'alpha': [0.1, 0.2, 0.3]
+        }
     elif strategy_to_tune.lower() == "greedy_ils":
         hyperparams = {
             'neighbor': ['Hamming', 'adjacent'],
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 8c6fbde41..1cf0ca32d 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -189,7 +189,7 @@ def repair(self, dna):
                 # if we have found valid neighboring configurations, select one at random
                 if len(neighbors) > 0:
                     new_dna = list(random.choice(neighbors))
-                    print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
+                    # print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
                     return new_dna
 
         return dna

From e4af9f7d30ba1ff08a65731943d431851038a5ea Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 16 May 2025 00:49:40 +0200
Subject: [PATCH 165/253] Added explicit restrictions definition to
 hyperparameter tuning

---
 kernel_tuner/backends/hypertuner.py | 2 +-
 kernel_tuner/hyper.py               | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index e33a9087d..ce090e944 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -78,7 +78,7 @@ def __init__(self, iterations, compiler_options=None):
                 "repeats": 25,
                 "samples": self.iterations,
                 "minimum_fraction_of_budget_valid": 0.1,
-                "minimum_number_of_valid_search_iterations": 10,
+                "minimum_number_of_valid_search_iterations": 5,
             },
             "statistics_settings": {
                 "cutoff_percentile": 0.95,
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 9942a2414..bb957c01b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -17,7 +17,7 @@ def randpath():
         path = randpath()
     return path
 
-def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs):
+def tune_hyper_params(target_strategy: str, hyper_params: dict, restrictions: list, *args, **kwargs):
     """Tune hyperparameters for a given strategy and kernel.
 
     This function is to be called just like tune_kernel, except that you specify a strategy
@@ -80,7 +80,7 @@ def put_if_not_present(target_dict, key, value):
     name = f"hyperparamtuning_{target_strategy.lower()}"
 
     # execute the hyperparameter tuning
-    result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, *args, lang='Hypertuner',
+    result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, restrictions=restrictions, *args, lang='Hypertuner',
                                     objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs)
     
     # remove the temporary cachefile and return only unique results in order
@@ -99,6 +99,7 @@ def put_if_not_present(target_dict, key, value):
     strategy_to_tune = args.strategy_to_tune
 
     # select the hyperparameter parameters for the selected optimization algorithm
+    restrictions = []
     if strategy_to_tune.lower() == "pso":
         hyperparams = {
             'popsize': [10, 20, 30],
@@ -169,6 +170,6 @@ def put_if_not_present(target_dict, key, value):
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 
     # run the hyperparameter tuning
-    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams)
+    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams, restrictions=restrictions)
     print(result)
     print(env['best_config'])

From 5f3b6fcae0ee690b0923fd79f0f338dc19396dcc Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 16 May 2025 18:20:16 +0200
Subject: [PATCH 166/253] Updated tune_kernel_T1 to be more broadly applicable

---
 kernel_tuner/interface.py | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index db8ab59ae..ae8927f3b 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -870,29 +870,37 @@ def tune_kernel_T1(
     simulation_mode=False,
     output_T4=True,
     iterations=7,
-    strategy_options=None,
-):
-    """Call the tune function with a T1 input file."""
+    device=None,
+    strategy: str=None,
+    strategy_options: dict={},
+) -> tuple:
+    """
+    Call the tune function with a T1 input file.
+    
+        The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used.
+    """
     inputs = get_input_file(input_filepath)
     kernelspec: dict = inputs["KernelSpecification"]
     kernel_name: str = kernelspec["KernelName"]
     kernel_filepath = Path(kernelspec["KernelFile"])
     kernel_source = (
-        kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent.parent / kernel_filepath
+        kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent / kernel_filepath
+    )
+    kernel_source = (
+        kernel_source if kernel_source.exists() else Path(input_filepath).parent.parent / kernel_filepath
     )
     assert kernel_source.exists(), f"KernelFile '{kernel_source}' does not exist at {kernel_source.resolve()}"
     language: str = kernelspec["Language"]
     problem_size = kernelspec["ProblemSize"]
-    device = kernelspec["Device"]["Name"]
-    strategy = inputs["Search"]["Name"]
-    if "Attributes" in inputs["Search"]:
-        strategy_options = {}
-        for attribute in inputs["Search"]["Attributes"]:
-            strategy_options[attribute["Name"]] = attribute["Value"]
+    if device is None:
+        device = kernelspec["Device"]["Name"]
+    if strategy is None:
+        strategy = inputs["Search"]["Name"]
+        if "Attributes" in inputs["Search"]:
+            for attribute in inputs["Search"]["Attributes"]:
+                strategy_options[attribute["Name"]] = attribute["Value"]
     if "Budget" in inputs:
         budget = inputs["Budget"][0]
-        if strategy_options is None:
-            strategy_options = {}
         if budget["Type"] == "ConfigurationCount":
             strategy_options["max_fevals"] = budget["BudgetValue"]
         elif budget["Type"] == "TuningDuration":

From 7f3a4a3dc05b3b963faa5691d024b08c028cb6b9 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 24 May 2025 12:53:53 +0200
Subject: [PATCH 167/253] Updated hyperparameters to newly tuned defaults

---
 kernel_tuner/strategies/genetic_algorithm.py   | 8 ++++----
 kernel_tuner/strategies/pso.py                 | 6 +++---
 kernel_tuner/strategies/simulated_annealing.py | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 1cf0ca32d..2e6104773 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -10,10 +10,10 @@
 from kernel_tuner.strategies.common import CostFunc
 
 _options = dict(
-    popsize=("population size", 30),
-    maxiter=("maximum number of generations", 30),
-    method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
-    mutation_chance=("chance to mutate is 1 in mutation_chance", 20),
+    popsize=("population size", 20),
+    maxiter=("maximum number of generations", 150),
+    method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "single_point"),
+    mutation_chance=("chance to mutate is 1 in mutation_chance", 5),
     constraint_aware=("constraint-aware optimization (True/False)", True),
 )
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index a7b75ed48..a02aed1c5 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -11,11 +11,11 @@
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
 
 _options = dict(
-    popsize=("Population size", 20),
-    maxiter=("Maximum number of iterations", 150),
+    popsize=("Population size", 30),
+    maxiter=("Maximum number of iterations", 100),
     w=("Inertia weight constant", 0.5),
     c1=("Cognitive constant", 3.0),
-    c2=("Social constant", 1.5),
+    c2=("Social constant", 0.5),
     constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 741800d24..d01ba7e4f 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -11,7 +11,7 @@
 
 
 _options = dict(T=("Starting temperature", 0.5),
-                T_min=("End temperature", 0.0001),
+                T_min=("End temperature", 0.001),
                 alpha=("Alpha parameter", 0.9975),
                 maxiter=("Number of iterations within each annealing step", 2),
                 constraint_aware=("constraint-aware optimization (True/False)", True))

From 80a5b62db227363496a3764991525018398ccf86 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 28 May 2025 16:18:28 +0200
Subject: [PATCH 168/253] Set default arguments if not provided

---
 kernel_tuner/hyper.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index bb957c01b..00ee42795 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -68,11 +68,12 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, restrictions: li
     def put_if_not_present(target_dict, key, value):
         target_dict[key] = value if key not in target_dict else target_dict[key]
 
+    # set default arguments if not provided
     put_if_not_present(kwargs, "verbose", True)
     put_if_not_present(kwargs, "quiet", False)
-    kwargs['simulation_mode'] = False
-    kwargs['strategy'] = 'brute_force'
-    kwargs['verify'] = None
+    put_if_not_present(kwargs, "simulation_mode", False)
+    put_if_not_present(kwargs, "strategy", brute_force)
+    put_if_not_present(kwargs, 'verify', None)
     arguments = [target_strategy]
 
     # IMPORTANT when running this script in parallel, always make sure the below name is unique among your runs!

From e9797e246f78c68db890be5a7077acceea2c88e9 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 28 May 2025 17:06:10 +0200
Subject: [PATCH 169/253] Made Hypertuner backend compatible with changes to
 Backend ABC

---
 kernel_tuner/backends/hypertuner.py | 4 ++++
 kernel_tuner/hyper.py               | 2 +-
 test/test_hyper.py                  | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index ce090e944..50971f5aa 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -168,3 +168,7 @@ def memcpy_dtoh(self, dest, src):
     
     def memcpy_htod(self, dest, src):
         return super().memcpy_htod(dest, src)
+
+    def refresh_memory(self, device_memory, host_arguments, should_sync):
+        """This is a no-op for the hypertuner backend, as it does not manage memory directly."""
+        pass
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 00ee42795..97bc01567 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -72,7 +72,7 @@ def put_if_not_present(target_dict, key, value):
     put_if_not_present(kwargs, "verbose", True)
     put_if_not_present(kwargs, "quiet", False)
     put_if_not_present(kwargs, "simulation_mode", False)
-    put_if_not_present(kwargs, "strategy", brute_force)
+    put_if_not_present(kwargs, "strategy", 'brute_force')
     put_if_not_present(kwargs, 'verify', None)
     arguments = [target_strategy]
 
diff --git a/test/test_hyper.py b/test/test_hyper.py
index f0dcdae5b..7863c2e47 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -34,6 +34,6 @@ def test_hyper(env):
         }
     }
 
-    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, compiler_options=compiler_options, verbose=True, cache=None)
+    result, env = tune_hyper_params(target_strategy, hyper_params, restrictions=[], iterations=1, compiler_options=compiler_options, verbose=True, cache=None)
     assert len(result) == 2
     assert 'best_config' in env

From 1a4c439705a4dfcfeed3460763ba852a0b75e044 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 28 May 2025 18:25:41 +0200
Subject: [PATCH 170/253] Adjusted GA popsize to only be adjusted when
 necessary

---
 kernel_tuner/strategies/genetic_algorithm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 2e6104773..27f07e8db 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -24,7 +24,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     pop_size, generations, method, mutation_chance, constraint_aware = common.get_options(options, _options)
 
     # if necessary adjust the popsize to a sensible value based on search space size
-    pop_size = min(round((searchspace.size / generations) * 3), pop_size)
+    if pop_size < 2 or pop_size > np.floor(searchspace.size / 2):
+        pop_size = min(max(round((searchspace.size / generations) * 3), 2), pop_size)
 
     GA = GeneticAlgorithm(pop_size, searchspace, method, mutation_chance, constraint_aware)
 

From b8a9902ec8ed2b47b53f4800de7b726c0f8ade47 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 2 Jun 2025 15:23:15 +0200
Subject: [PATCH 171/253] Implemented utility method to dynamically import
 class from file

---
 kernel_tuner/file_utils.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
index 9231f0e2e..16ead7c4d 100644
--- a/kernel_tuner/file_utils.py
+++ b/kernel_tuner/file_utils.py
@@ -3,6 +3,7 @@
 import json
 import subprocess
 from importlib.metadata import PackageNotFoundError, requires, version
+from importlib.util import spec_from_file_location, module_from_spec
 from pathlib import Path
 from sys import platform
 
@@ -302,3 +303,18 @@ def store_metadata_file(metadata_filename: str):
     with open(metadata_filenamepath, "w+") as fh:
         json.dump(metadata_json, fh, indent="  ")
 
+def import_class_from_file(file_path: Path, class_name):
+    """Import a class from a file."""
+    module_name = file_path.stem
+    spec = spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Could not load spec from {file_path}")
+    
+    # create a module from the spec and execute it
+    module = module_from_spec(spec)
+    spec.loader.exec_module(module)
+    if not hasattr(module, class_name):
+        raise ImportError(f"Module '{module_name}' has no class '{class_name}'")
+    
+    # return the class from the module
+    return getattr(module, class_name)

From 8f3744d7979d133f260b1ce33f3073852cd7dd04 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 2 Jun 2025 15:24:02 +0200
Subject: [PATCH 172/253] Implemented passing custom search method path and
 options usng T1 format

---
 kernel_tuner/interface.py          | 18 ++++++++++++++++--
 kernel_tuner/strategies/wrapper.py |  2 +-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 9ee58cb66..3386bbd8b 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -36,7 +36,7 @@
 
 import kernel_tuner.core as core
 import kernel_tuner.util as util
-from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results
+from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results, import_class_from_file
 from kernel_tuner.integration import get_objective_defaults
 from kernel_tuner.runners.sequential import SequentialRunner
 from kernel_tuner.runners.simulation import SimulationRunner
@@ -47,6 +47,7 @@
 except ImportError:
     torch = util.TorchPlaceHolder()
 
+from kernel_tuner.strategies.wrapper import OptAlgWrapper
 from kernel_tuner.strategies import (
     basinhopping,
     bayes_opt,
@@ -62,7 +63,7 @@
     ordered_greedy_mls,
     pso,
     random_sample,
-    simulated_annealing,
+    simulated_annealing
 )
 
 strategy_map = {
@@ -894,6 +895,19 @@ def tune_kernel_T1(
         else:
             raise NotImplementedError(f"Budget type in {budget} is not supported")
 
+    # check if the strategy is a path
+    if "custom_search_method_path" in strategy_options:
+        # if it is a path, import the strategy from the file
+        opt_path: Path = Path(strategy_options["custom_search_method_path"])
+        class_name: str = strategy
+        assert opt_path.exists(), f"Custom search method path '{opt_path}' does not exist relative to current working directory {Path.cwd()}"
+        optimizer_class = import_class_from_file(opt_path, class_name)
+        budget = strategy_options.get("max_fevals", 1e12)    # if not set, use a very large number to have it run out at the time limit
+        filter_keys = ["custom_search_method_path", "max_fevals", "time_limit", "constraint_aware"]
+        adjusted_strategy_options = {k:v for k, v in strategy_options.items() if k not in filter_keys}
+        optimizer_instance = optimizer_class(budget=budget, **adjusted_strategy_options)
+        strategy = OptAlgWrapper(optimizer_instance)
+
     # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:
         cache_filepath = Path(kernelspec["SimulationInput"])
diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py
index 8104f7129..839fd22dc 100644
--- a/kernel_tuner/strategies/wrapper.py
+++ b/kernel_tuner/strategies/wrapper.py
@@ -21,7 +21,7 @@ def tune(self, searchspace: Searchspace, runner, tuning_options):
             cost_func.get_bounds_x0_eps()
 
         try:
-            self.optimizer(cost_func)
+            self.optimizer(cost_func, searchspace)
         except util.StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)

From 72b615bc134143bed8859b1f93097b8ef58493d5 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 4 Jun 2025 11:43:22 +0200
Subject: [PATCH 173/253] Implemented passing arguments to costfunc from
 optimizer in OptAlgWrapper

---
 kernel_tuner/strategies/wrapper.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py
index 839fd22dc..fa3de86ac 100644
--- a/kernel_tuner/strategies/wrapper.py
+++ b/kernel_tuner/strategies/wrapper.py
@@ -8,15 +8,14 @@
 class OptAlgWrapper:
     """Wrapper class for user-defined optimization algorithms"""
 
-    def __init__(self, optimizer, scaling=True):
+    def __init__(self, optimizer):
         self.optimizer = optimizer
-        self.scaling = scaling
 
 
     def tune(self, searchspace: Searchspace, runner, tuning_options):
-        cost_func = CostFunc(searchspace, tuning_options, runner, scaling=self.scaling)
+        cost_func = CostFunc(searchspace, tuning_options, runner, **self.optimizer.costfunc_kwargs)
 
-        if self.scaling:
+        if self.optimizer.costfunc_kwargs.get('scaling', True):
             # Initialize costfunc for scaling
             cost_func.get_bounds_x0_eps()
 

From 5ce2495751c9b3ed5ad734c7b2b01e602ed692f2 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 4 Jun 2025 13:27:19 +0200
Subject: [PATCH 174/253] Implemented abstract base class for custom
 optimization algorithm strategies

---
 kernel_tuner/strategies/wrapper.py | 35 ++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py
index 8104f7129..a0aa56a4b 100644
--- a/kernel_tuner/strategies/wrapper.py
+++ b/kernel_tuner/strategies/wrapper.py
@@ -1,27 +1,48 @@
 """Wrapper intended for user-defined custom optimization methods"""
 
+from abc import ABC, abstractmethod
+
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import CostFunc
 
 
+class OptAlg(ABC):
+    """Base class for user-defined optimization algorithms."""
+
+    def __init__(self):
+        self.costfunc_kwargs = {"scaling": True, "snap": True}
+
+    @abstractmethod
+    def __call__(self, func: CostFunc, searchspace: Searchspace, budget_spent_fraction: float) -> tuple[tuple, float]:
+        """_summary_
+
+        Args:
+            func (CostFunc): Cost function to be optimized.
+            searchspace (Searchspace): Search space containing the parameters to be optimized.
+            budget_spent_fraction (float): Fraction of the budget that has already been spent.
+
+        Returns:
+            tuple[tuple, float]: tuple of the best parameters and the corresponding cost value
+        """
+        pass
+
+
 class OptAlgWrapper:
     """Wrapper class for user-defined optimization algorithms"""
 
-    def __init__(self, optimizer, scaling=True):
-        self.optimizer = optimizer
-        self.scaling = scaling
-
+    def __init__(self, optimizer: OptAlg):
+        self.optimizer: OptAlg = optimizer
 
     def tune(self, searchspace: Searchspace, runner, tuning_options):
-        cost_func = CostFunc(searchspace, tuning_options, runner, scaling=self.scaling)
+        cost_func = CostFunc(searchspace, tuning_options, runner, **self.optimizer.costfunc_kwargs)
 
-        if self.scaling:
+        if self.optimizer.costfunc_kwargs.get('scaling', True):
             # Initialize costfunc for scaling
             cost_func.get_bounds_x0_eps()
 
         try:
-            self.optimizer(cost_func)
+            self.optimizer(cost_func, searchspace)
         except util.StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)

From 22a6e0e84df2852fd99163079530639e3e6e66f5 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 4 Jun 2025 15:22:50 +0200
Subject: [PATCH 175/253]  returns the fraction of the budget that has been
 spent

---
 kernel_tuner/strategies/common.py |  3 ++-
 kernel_tuner/util.py              | 28 ++++++++++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index d01eae937..658094141 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -60,6 +60,7 @@ def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=
         self.scaling = scaling
         self.searchspace = searchspace
         self.results = []
+        self.budget_spent_fraction = 0.0
 
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
@@ -70,7 +71,7 @@ def __call__(self, x, check_restrictions=True):
         logging.debug('x: ' + str(x))
 
         # check if max_fevals is reached or time limit is exceeded
-        util.check_stop_criterion(self.tuning_options)
+        self.budget_spent_fraction = util.check_stop_criterion(self.tuning_options)
 
         # snap values in x to nearest actual value for each parameter, unscale x if needed
         if self.snap:
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 072cce433..7713adbde 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -189,12 +189,28 @@ def check_argument_list(kernel_name, kernel_string, args):
         warnings.warn(errors[0], UserWarning)
 
 
-def check_stop_criterion(to):
-    """Checks if max_fevals is reached or time limit is exceeded."""
-    if "max_fevals" in to and len(to.unique_results) >= to.max_fevals:
-        raise StopCriterionReached("max_fevals reached")
-    if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit):
-        raise StopCriterionReached("time limit exceeded")
+def check_stop_criterion(to: dict) -> float:
+    """Check if the stop criterion is reached.
+
+    Args:
+        to (dict): tuning options.
+
+    Raises:
+        StopCriterionReached: if the max_fevals is reached or time limit is exceeded.
+
+    Returns:
+        float: fraction of budget spent.
+    """
+    if "max_fevals" in to:
+        if len(to.unique_results) >= to.max_fevals:
+            raise StopCriterionReached(f"max_fevals ({to.max_fevals}) reached")
+        return len(to.unique_results) / to.max_fevals
+    if "time_limit" in to:
+        time_spent = (time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)
+        if time_spent > to.time_limit:
+            raise StopCriterionReached("time limit exceeded")
+        return time_spent / to.time_limit
+    
 
 
 def check_tune_params_list(tune_params, observers, simulation_mode=False):

From a4a69ae04e0d9aa23ea75e4840499f9f50f93f44 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 4 Jun 2025 15:24:28 +0200
Subject: [PATCH 176/253] Adjusted CostFunc and tests to use
 budget_spent_fraction

---
 kernel_tuner/strategies/wrapper.py |  7 ++---
 test/test_custom_optimizer.py      | 49 +++++++++++-------------------
 2 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py
index a0aa56a4b..1a928ab17 100644
--- a/kernel_tuner/strategies/wrapper.py
+++ b/kernel_tuner/strategies/wrapper.py
@@ -14,13 +14,12 @@ def __init__(self):
         self.costfunc_kwargs = {"scaling": True, "snap": True}
 
     @abstractmethod
-    def __call__(self, func: CostFunc, searchspace: Searchspace, budget_spent_fraction: float) -> tuple[tuple, float]:
-        """_summary_
+    def __call__(self, func: CostFunc, searchspace: Searchspace) -> tuple[tuple, float]:
+        """Optimize the black box function `func` within the given `searchspace`.
 
         Args:
-            func (CostFunc): Cost function to be optimized.
+            func (CostFunc): Cost function to be optimized. Has a property `budget_spent_fraction` that indicates how much of the budget has been spent.
             searchspace (Searchspace): Search space containing the parameters to be optimized.
-            budget_spent_fraction (float): Fraction of the budget that has already been spent.
 
         Returns:
             tuple[tuple, float]: tuple of the best parameters and the corresponding cost value
diff --git a/test/test_custom_optimizer.py b/test/test_custom_optimizer.py
index 7c483bad4..cfc136d3c 100644
--- a/test/test_custom_optimizer.py
+++ b/test/test_custom_optimizer.py
@@ -3,7 +3,9 @@
 
 import numpy as np
 
-class HybridDELocalRefinement:
+from kernel_tuner.strategies.wrapper import OptAlg
+
+class HybridDELocalRefinement(OptAlg):
     """
     A two-phase differential evolution with local refinement, intended for BBOB-type
     black box optimization problems in [-5,5]^dim.
@@ -12,21 +14,14 @@ class HybridDELocalRefinement:
     exploration and local exploitation under a strict function evaluation budget.
     """
 
-    def __init__(self, budget, dim):
-        """
-        Initialize the optimizer with:
-        - budget: total number of function evaluations allowed.
-        - dim: dimensionality of the search space.
-        """
-        self.budget = budget
-        self.dim = dim
+    def __init__(self):
+        super().__init__()
         # You can adjust these hyperparameters based on experimentation/tuning:
-        self.population_size = min(50, 10 * dim)  # Caps for extremely large dim
         self.F = 0.8        # Differential weight
         self.CR = 0.9       # Crossover probability
         self.local_search_freq = 10  # Local refinement frequency in generations
 
-    def __call__(self, func):
+    def __call__(self, func, searchspace):
         """
         Optimize the black box function `func` in [-5,5]^dim, using
         at most self.budget function evaluations.
@@ -35,9 +30,8 @@ def __call__(self, func):
             best_params: np.ndarray representing the best parameters found
             best_value: float representing the best objective value found
         """
-        # Check if we have a non-positive budget
-        if self.budget <= 0:
-            raise ValueError("Budget must be a positive integer.")
+        self.dim = searchspace.num_params
+        self.population_size = round(min(min(50, 10 * self.dim), np.ceil(searchspace.size / 3)))  # Caps for extremely large dim
 
         # 1. Initialize population
         lower_bound, upper_bound = -5.0, 5.0
@@ -49,8 +43,6 @@ def __call__(self, func):
         for i in range(self.population_size):
             fitness[i] = func(pop[i])
             evaluations += 1
-            if evaluations >= self.budget:
-                break
 
         # Track best solution
         best_idx = np.argmin(fitness)
@@ -59,7 +51,7 @@ def __call__(self, func):
 
         # 2. Main evolutionary loop
         gen = 0
-        while evaluations < self.budget:
+        while func.budget_spent_fraction < 1.0 and evaluations < searchspace.size:
             gen += 1
             for i in range(self.population_size):
                 # DE mutation: pick three distinct indices
@@ -78,7 +70,7 @@ def __call__(self, func):
                 # Evaluate trial
                 trial_fitness = func(trial)
                 evaluations += 1
-                if evaluations >= self.budget:
+                if func.budget_spent_fraction > 1.0:
                     # If out of budget, wrap up
                     if trial_fitness < fitness[i]:
                         pop[i] = trial
@@ -99,14 +91,11 @@ def __call__(self, func):
                         best_params = trial.copy()
 
             # Periodically refine best solution with a small local neighborhood search
-            if gen % self.local_search_freq == 0 and evaluations < self.budget:
+            if gen % self.local_search_freq == 0 and func.budget_spent_fraction < 1.0:
                 best_params, best_value, evaluations = self._local_refinement(
                     func, best_params, best_value, evaluations, lower_bound, upper_bound
                 )
 
-            if evaluations >= self.budget:
-                break
-
         return best_params, best_value
 
     def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub):
@@ -115,11 +104,10 @@ def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub):
         Uses a quick 'perturb-and-accept' approach in a shrinking neighborhood.
         """
         # Neighborhood size shrinks as the budget is consumed
-        frac_budget_used = evaluations / self.budget
-        step_size = 0.2 * (1.0 - frac_budget_used)
+        step_size = 0.2 * (1.0 - func.budget_spent_fraction)
 
         for _ in range(5):  # 5 refinements each time
-            if evaluations >= self.budget:
+            if func.budget_spent_fraction >= 1.0:
                 break
             candidate = best_params + np.random.uniform(-step_size, step_size, self.dim)
             candidate = np.clip(candidate, lb, ub)
@@ -138,26 +126,23 @@ def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub):
 import os
 from kernel_tuner import tune_kernel
 from kernel_tuner.strategies.wrapper import OptAlgWrapper
-cache_filename = os.path.dirname(
-
-    os.path.realpath(__file__)) + "/test_cache_file.json"
 
 from .test_runners import env
 
+cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
 
 def test_OptAlgWrapper(env):
     kernel_name, kernel_string, size, args, tune_params = env
 
     # Instantiate LLaMAE optimization algorithm
-    budget = int(15)
-    dim = len(tune_params)
-    optimizer = HybridDELocalRefinement(budget, dim)
+    optimizer = HybridDELocalRefinement()
 
     # Wrap the algorithm class in the OptAlgWrapper
     # for use in Kernel Tuner
     strategy = OptAlgWrapper(optimizer)
+    strategy_options = { 'max_fevals': 15 }
 
     # Call the tuner
     tune_kernel(kernel_name, kernel_string, size, args, tune_params,
-                strategy=strategy, cache=cache_filename,
+                strategy=strategy, strategy_options=strategy_options, cache=cache_filename,
                 simulation_mode=True, verbose=True)

From e8ff6e758842cdb2b0ee9b0dd3d24bb222b82773 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 5 Jun 2025 10:01:13 +0200
Subject: [PATCH 177/253] Improved dynamic import of modules for custom
 strategies

---
 kernel_tuner/file_utils.py | 27 +++++++++++++++++----------
 kernel_tuner/interface.py  |  5 +++--
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
index 16ead7c4d..7684eeb84 100644
--- a/kernel_tuner/file_utils.py
+++ b/kernel_tuner/file_utils.py
@@ -305,16 +305,23 @@ def store_metadata_file(metadata_filename: str):
 
 def import_class_from_file(file_path: Path, class_name):
     """Import a class from a file."""
-    module_name = file_path.stem
-    spec = spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Could not load spec from {file_path}")
-    
-    # create a module from the spec and execute it
-    module = module_from_spec(spec)
-    spec.loader.exec_module(module)
-    if not hasattr(module, class_name):
-        raise ImportError(f"Module '{module_name}' has no class '{class_name}'")
+
+    def load_module(module_name):
+        spec = spec_from_file_location(module_name, file_path)
+        if spec is None:
+            raise ImportError(f"Could not load spec from {file_path}")
+        
+        # create a module from the spec and execute it
+        module = module_from_spec(spec)
+        spec.loader.exec_module(module)
+        if not hasattr(module, class_name):
+            raise ImportError(f"Module '{module_name}' has no class '{class_name}'")
+        return module
+
+    try:
+        module = load_module(file_path.stem)
+    except ImportError:
+        module = load_module(f"{file_path.parent.stem}.{file_path.stem}")
     
     # return the class from the module
     return getattr(module, class_name)
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 3386bbd8b..a1a87274b 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -902,10 +902,9 @@ def tune_kernel_T1(
         class_name: str = strategy
         assert opt_path.exists(), f"Custom search method path '{opt_path}' does not exist relative to current working directory {Path.cwd()}"
         optimizer_class = import_class_from_file(opt_path, class_name)
-        budget = strategy_options.get("max_fevals", 1e12)    # if not set, use a very large number to have it run out at the time limit
         filter_keys = ["custom_search_method_path", "max_fevals", "time_limit", "constraint_aware"]
         adjusted_strategy_options = {k:v for k, v in strategy_options.items() if k not in filter_keys}
-        optimizer_instance = optimizer_class(budget=budget, **adjusted_strategy_options)
+        optimizer_instance = optimizer_class(**adjusted_strategy_options)
         strategy = OptAlgWrapper(optimizer_instance)
 
     # set the cache path
@@ -973,6 +972,8 @@ def tune_kernel_T1(
         elif arg["MemoryType"] == "Scalar":
             if arg["Type"] == "float":
                 argument = numpy.float32(arg["FillValue"])
+            elif arg["Type"] == "int32":
+                argument = numpy.int32(arg["FillValue"])
             else:
                 raise NotImplementedError()
         if argument is not None:

From 160d4b7db4b8894a465d153cef8d82ce4001b38c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 5 Jun 2025 10:02:22 +0200
Subject: [PATCH 178/253] Expanded custom optimizer test to T1 input format

---
 .gitignore                         |  2 +-
 test/test_cache_file_T1_input.json | 82 ++++++++++++++++++++++++++++++
 test/test_custom_optimizer.py      | 39 +++++++++++---
 test/vector_add.cu                 |  6 +++
 4 files changed, 121 insertions(+), 8 deletions(-)
 create mode 100644 test/test_cache_file_T1_input.json
 create mode 100644 test/vector_add.cu

diff --git a/.gitignore b/.gitignore
index 1f576769a..479c7188c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,7 +20,7 @@ push_to_pypi.sh
 *.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
-!test_cache_file.json
+!test_cache_file*.json
 *.csv
 .cache
 *.ipynb_checkpoints
diff --git a/test/test_cache_file_T1_input.json b/test/test_cache_file_T1_input.json
new file mode 100644
index 000000000..814a1145d
--- /dev/null
+++ b/test/test_cache_file_T1_input.json
@@ -0,0 +1,82 @@
+{
+    "General": {
+        "BenchmarkName": "vector_add",
+        "OutputFormat": "JSON"
+    },
+    "ConfigurationSpace": {
+        "TuningParameters": [
+            {
+                "Name": "block_size_x",
+                "Type": "int",
+                "Values": "[128+64*i for i in range(15)]",
+                "Default": 512
+            }
+        ],
+        "Conditions": []
+    },
+    "KernelSpecification": {
+        "Language": "CUDA",
+        "CompilerOptions": [
+            "-std=c++11"
+        ],
+        "BenchmarkName": "vector_add",
+        "KernelName": "vector_add",
+        "KernelFile": "vector_add.cu",
+        "GlobalSizeType": "CUDA",
+        "LocalSize": {
+            "X": "block_size_x",
+            "Y": "1",
+            "Z": "1"
+        },
+        "GlobalSize": {
+            "X": "10000000 // block_size_x",
+            "Y": "1",
+            "Z": "1"
+        },
+        "GridDivX": [
+            "block_size_x"
+        ],
+        "GridDivY": [
+            "block_size_y"
+        ],
+        "ProblemSize": [],
+        "SharedMemory": 0,
+        "Stream": null,
+        "Arguments": [
+            {
+                "Name": "a",
+                "Type": "float",
+                "MemoryType": "Vector",
+                "AccessType": "ReadOnly",
+                "FillType": "Random",
+                "Size": 10000000,
+                "FillValue": 1.0
+            },
+            {
+                "Name": "b",
+                "Type": "float",
+                "MemoryType": "Vector",
+                "AccessType": "ReadOnly",
+                "FillType": "Random",
+                "Size": 10000000,
+                "FillValue": 1.0
+            },
+            {
+                "Name": "c",
+                "Type": "float",
+                "MemoryType": "Vector",
+                "AccessType": "WriteOnly",
+                "FillType": "Constant",
+                "Size": 10000000,
+                "FillValue": 0.0
+            },
+            {
+                "Name": "n",
+                "Type": "int32",
+                "MemoryType": "Scalar",
+                "AccessType": "ReadOnly",
+                "FillValue": 10000000
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/test/test_custom_optimizer.py b/test/test_custom_optimizer.py
index cfc136d3c..4d9b1c125 100644
--- a/test/test_custom_optimizer.py
+++ b/test/test_custom_optimizer.py
@@ -120,16 +120,14 @@ def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub):
         return best_params, best_value, evaluations
 
 
-
-
 ### Testing the Optimization Algorithm Wrapper in Kernel Tuner
-import os
-from kernel_tuner import tune_kernel
+from kernel_tuner import tune_kernel, tune_kernel_T1
 from kernel_tuner.strategies.wrapper import OptAlgWrapper
+from pathlib import Path
 
-from .test_runners import env
+from .test_runners import env   # noqa: F401
 
-cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
+cache_filename = Path(__file__).parent.resolve() / "test_cache_file.json"
 
 def test_OptAlgWrapper(env):
     kernel_name, kernel_string, size, args, tune_params = env
@@ -143,6 +141,33 @@ def test_OptAlgWrapper(env):
     strategy_options = { 'max_fevals': 15 }
 
     # Call the tuner
-    tune_kernel(kernel_name, kernel_string, size, args, tune_params,
+    res, _ = tune_kernel(kernel_name, kernel_string, size, args, tune_params,
                 strategy=strategy, strategy_options=strategy_options, cache=cache_filename,
                 simulation_mode=True, verbose=True)
+    assert len(res) == strategy_options['max_fevals']
+
+def test_OptAlgWrapper_T1(env):
+    kernel_name, kernel_string, size, args, tune_params = env
+
+    strategy = "HybridDELocalRefinement"
+    strategy_options = {
+        "max_fevals": 15,
+        "custom_search_method_path": Path(__file__).resolve(),
+        "constraint_aware": False,
+    }
+    iterations = 1
+    
+    res, _ = tune_kernel_T1(
+        Path(__file__).parent.resolve() / "test_cache_file_T1_input.json",
+        cache_filename,
+        device="NVIDIA RTX A4000",
+        objective="time",
+        objective_higher_is_better=False,
+        simulation_mode=True,
+        output_T4=False,
+        iterations=iterations,
+        strategy=strategy,
+        strategy_options=strategy_options,
+    )
+
+    assert len(res) == strategy_options['max_fevals']
diff --git a/test/vector_add.cu b/test/vector_add.cu
new file mode 100644
index 000000000..e79c16308
--- /dev/null
+++ b/test/vector_add.cu
@@ -0,0 +1,6 @@
+__global__ void vector_add(float *c, float *a, float *b, int n) {
+    int i = blockIdx.x * block_size_x + threadIdx.x;
+    if (i<n) {
+        c[i] = a[i] + b[i];
+    }
+}
\ No newline at end of file

From f52d573785f2516b3b44f2aee2fe0cfd2e57de76 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 12 Jun 2025 16:08:11 +0200
Subject: [PATCH 179/253] Improve GA constraint-awareness differentiation

---
 kernel_tuner/strategies/genetic_algorithm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 27f07e8db..6e6a873d7 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -36,7 +36,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     population = GA.generate_population()
 
     for generation in range(generations):
-        if any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]):
+        if constraint_aware and any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]):
             raise ValueError(f"Generation {generation}/{generations}, population validity: {[searchspace.is_param_config_valid(tuple(dna)) for dna in population]}")
 
         # determine fitness of population members
@@ -77,7 +77,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             for child in children:
                 child = GA.mutate(child)
 
-                if child not in population and searchspace.is_param_config_valid(tuple(child)):
+                if child not in population and (not constraint_aware or searchspace.is_param_config_valid(tuple(child))):
                     population.append(child)
 
                 if len(population) >= pop_size:

From fbb1c19d5411ca18c216c6cfcce2286a8112dabb Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 12 Jun 2025 16:09:36 +0200
Subject: [PATCH 180/253] Implemented simulation of configurations outside the
 restrictions for simulation mode

---
 kernel_tuner/runners/simulation.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 22c7c667c..588ac99bc 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -2,6 +2,7 @@
 import logging
 from collections import namedtuple
 from time import perf_counter
+from warnings import warn
 
 from kernel_tuner import util
 from kernel_tuner.runners.runner import Runner
@@ -127,8 +128,26 @@ def run(self, parameter_space, tuning_options):
                 results.append(result)
                 continue
 
-            # if the element is not in the cache, raise an error
-            check = util.check_restrictions(tuning_options.restrictions, dict(zip(tuning_options['tune_params'].keys(), element)), True)
+            # if the configuration is not in the cache and not within restrictions, simulate an InvalidConfig with warning
+            params_dict = dict(zip(tuning_options['tune_params'].keys(), element))
+            check = util.check_restrictions(tuning_options.restrictions, params_dict, True)
+            if not check:
+                result = params_dict
+                result['compile_time'] = 0
+                result['verification_time'] = 0
+                result['benchmark_time'] = 0
+                result['strategy_time'] = self.last_strategy_time
+
+                total_time = 1000 * (perf_counter() - self.start_time)
+                self.start_time = perf_counter()
+                result['framework_time'] = total_time - self.last_strategy_time
+
+                result[tuning_options.objective] = util.InvalidConfig()
+                results.append(result)
+                warn(f"Configuration {element} not in cache, does not pass restrictions. Will be treated as an InvalidConfig, but make sure you are evaluating the correct cache file.")
+                continue
+
+            # if the configuration is not in the cache and passes restrictions, return a ValueError
             err_string = f"kernel configuration {element} not in cache, does {'' if check else 'not '}pass extra restriction check ({check})"
             logging.debug(err_string)
             raise ValueError(f"{err_string} - in simulation mode, all configurations must be present in the cache")

From 7d9385516e27c8a3bd72fcfe7a06cd665df613c4 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 16 Jun 2025 17:45:33 +0200
Subject: [PATCH 181/253] Ensure the time limit is still checked if we also
 have a fevals budget

---
 kernel_tuner/util.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 5c2aa4cfe..e51040a22 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -200,12 +200,13 @@ def check_stop_criterion(to: dict) -> float:
         StopCriterionReached: if the max_fevals is reached or time limit is exceeded.
 
     Returns:
-        float: fraction of budget spent.
+        float: fraction of budget spent. If both max_fevals and time_limit are set, it returns the fraction of time.
     """
     if "max_fevals" in to:
         if len(to.unique_results) >= to.max_fevals:
             raise StopCriterionReached(f"max_fevals ({to.max_fevals}) reached")
-        return len(to.unique_results) / to.max_fevals
+        if not "time_limit" in to:
+            return len(to.unique_results) / to.max_fevals
     if "time_limit" in to:
         time_spent = (time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time
         if time_spent > to.time_limit:

From 3c293a18702a04d4cb6c0292f7a314ec9fe59842 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 16 Jun 2025 17:46:17 +0200
Subject: [PATCH 182/253] If the optimizer has a constraint_aware attribute,
 set it in the strategy options

---
 kernel_tuner/interface.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index a1a87274b..659fee996 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -906,6 +906,9 @@ def tune_kernel_T1(
         adjusted_strategy_options = {k:v for k, v in strategy_options.items() if k not in filter_keys}
         optimizer_instance = optimizer_class(**adjusted_strategy_options)
         strategy = OptAlgWrapper(optimizer_instance)
+        if "constraint_aware" not in strategy_options and hasattr(optimizer_instance, "constraint_aware"):
+            # if the optimizer has a constraint_aware attribute, set it in the strategy options
+            strategy_options["constraint_aware"] = optimizer_instance.constraint_aware
 
     # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:

From 268bf679323f7d52476db35ebf7b23fb505a9fe8 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 17 Jun 2025 16:06:13 +0200
Subject: [PATCH 183/253] Initial version of using external package strategies

---
 kernel_tuner/strategies/ptatf_strategies.py | 43 +++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 kernel_tuner/strategies/ptatf_strategies.py

diff --git a/kernel_tuner/strategies/ptatf_strategies.py b/kernel_tuner/strategies/ptatf_strategies.py
new file mode 100644
index 000000000..a13c4a5f0
--- /dev/null
+++ b/kernel_tuner/strategies/ptatf_strategies.py
@@ -0,0 +1,43 @@
+"""Strategy that enables the use of pyATF strategies."""
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies import common
+from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options
+from kernel_tuner.util import StopCriterionReached
+
+supported_methods = ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"]
+
+_options = dict(method=(f"Local optimization algorithm to use, choose any from {supported_methods}", "L-BFGS-B"),
+                       T=("Temperature parameter for the accept or reject criterion", 1.0))
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    method, T = common.get_options(tuning_options.strategy_options, _options)
+
+    # scale variables in x to make 'eps' relevant for multiple variables
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
+
+    bounds, x0, eps = cost_func.get_bounds_x0_eps()
+
+    kwargs = setup_method_arguments(method, bounds)
+    options = setup_method_options(method, tuning_options)
+    kwargs['options'] = options
+
+
+    minimizer_kwargs = dict(**kwargs)
+    minimizer_kwargs["method"] = method
+
+    opt_result = None
+    try:
+        opt_result = scipy.optimize.basinhopping(cost_func, x0, T=T, stepsize=eps,
+                                             minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose)
+    except StopCriterionReached as e:
+        if tuning_options.verbose:
+            print(e)
+
+    if opt_result and tuning_options.verbose:
+        print(opt_result.message)
+
+    return cost_func.results
+
+
+tune.__doc__ = common.get_strategy_docstring("basin hopping", _options)

From 20ea709b6e986c68872758d10fad4f1e6fae674a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 19 Jun 2025 08:45:05 +0200
Subject: [PATCH 184/253] Implemented the pyatf_strategies, which enables using
 the pyATF strategies in Kernel Tuner

---
 kernel_tuner/interface.py                   |  14 +-
 kernel_tuner/searchspace.py                 |   6 +-
 kernel_tuner/strategies/common.py           |   1 -
 kernel_tuner/strategies/ptatf_strategies.py |  43 ---
 kernel_tuner/strategies/pyatf_strategies.py | 304 ++++++++++++++++++++
 5 files changed, 317 insertions(+), 51 deletions(-)
 delete mode 100644 kernel_tuner/strategies/ptatf_strategies.py
 create mode 100644 kernel_tuner/strategies/pyatf_strategies.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index a1a87274b..e715f4a86 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -47,7 +47,6 @@
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-from kernel_tuner.strategies.wrapper import OptAlgWrapper
 from kernel_tuner.strategies import (
     basinhopping,
     bayes_opt,
@@ -62,9 +61,11 @@
     mls,
     ordered_greedy_mls,
     pso,
+    pyatf_strategies,
     random_sample,
-    simulated_annealing
+    simulated_annealing,
 )
+from kernel_tuner.strategies.wrapper import OptAlgWrapper
 
 strategy_map = {
     "brute_force": brute_force,
@@ -81,7 +82,8 @@
     "pso": pso,
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
-    "bayes_opt": bayes_opt
+    "bayes_opt": bayes_opt,
+    "pyatf_strategies": pyatf_strategies,
 }
 
 
@@ -629,6 +631,7 @@ def tune_kernel(
     logging.debug("tuning_options: %s", util.get_config_string(tuning_options))
     logging.debug("device_options: %s", util.get_config_string(device_options))
 
+    strategy_string = strategy
     if strategy:
         if strategy in strategy_map:
             strategy = strategy_map[strategy]
@@ -861,10 +864,9 @@ def tune_kernel_T1(
     strategy: str=None,
     strategy_options: dict={},
 ) -> tuple:
-    """
-    Call the tune function with a T1 input file.
+    """Call the tune function with a T1 input file.
     
-        The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used.
+    The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used.
     """
     inputs = get_input_file(input_filepath)
     kernelspec: dict = inputs["KernelSpecification"]
diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index e650f9628..ffbfa569a 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -76,6 +76,7 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
+        self.tune_params_pyatf = None
         self._tensorspace = None
         self.tensor_dtype = torch.float32 if torch_available else None
         self.tensor_device = torch.device("cpu") if torch_available else None
@@ -376,10 +377,13 @@ def get_params():
                     constraint = res
                 params.append(TP(key, vals, constraint, constraint_source))
             return params
+        
+        # set data
+        self.tune_params_pyatf = get_params()
 
         # tune
         _, _, tuning_data = (
-            Tuner().verbosity(0).tuning_parameters(*get_params()).search_technique(Exhaustive()).tune(costfunc)
+            Tuner().verbosity(0).tuning_parameters(*self.tune_params_pyatf).search_technique(Exhaustive()).tune(costfunc)
         )
 
         # transform the result into a list of parameter configurations for validation
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 1ab0b730c..34a2a8b32 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -356,7 +356,6 @@ def scale_from_params(params, tune_params, eps):
 
 def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps):
     """Helper func to snap to the nearest valid configuration"""
-
     # params is nearest unscaled point, but is not valid
     neighbors = get_neighbors(params, searchspace)
 
diff --git a/kernel_tuner/strategies/ptatf_strategies.py b/kernel_tuner/strategies/ptatf_strategies.py
deleted file mode 100644
index a13c4a5f0..000000000
--- a/kernel_tuner/strategies/ptatf_strategies.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""Strategy that enables the use of pyATF strategies."""
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies import common
-from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options
-from kernel_tuner.util import StopCriterionReached
-
-supported_methods = ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"]
-
-_options = dict(method=(f"Local optimization algorithm to use, choose any from {supported_methods}", "L-BFGS-B"),
-                       T=("Temperature parameter for the accept or reject criterion", 1.0))
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    method, T = common.get_options(tuning_options.strategy_options, _options)
-
-    # scale variables in x to make 'eps' relevant for multiple variables
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
-
-    bounds, x0, eps = cost_func.get_bounds_x0_eps()
-
-    kwargs = setup_method_arguments(method, bounds)
-    options = setup_method_options(method, tuning_options)
-    kwargs['options'] = options
-
-
-    minimizer_kwargs = dict(**kwargs)
-    minimizer_kwargs["method"] = method
-
-    opt_result = None
-    try:
-        opt_result = scipy.optimize.basinhopping(cost_func, x0, T=T, stepsize=eps,
-                                             minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose)
-    except StopCriterionReached as e:
-        if tuning_options.verbose:
-            print(e)
-
-    if opt_result and tuning_options.verbose:
-        print(opt_result.message)
-
-    return cost_func.results
-
-
-tune.__doc__ = common.get_strategy_docstring("basin hopping", _options)
diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py
new file mode 100644
index 000000000..929b12613
--- /dev/null
+++ b/kernel_tuner/strategies/pyatf_strategies.py
@@ -0,0 +1,304 @@
+"""Strategy that dynamically imports and enables the use of pyATF strategies."""
+
+from importlib import import_module
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies import common
+from kernel_tuner.strategies.common import CostFunc
+from kernel_tuner.util import StopCriterionReached
+
+supported_searchtechniques = ["auc_bandit", "differential_evolution", "pattern_search", "round_robin", "simulated_annealing"]
+
+_options = dict(searchtechnique=(f"PyATF optimization algorithm to use, choose any from {supported_searchtechniques}", "simulated_annealing"))
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    from pyatf.search_techniques.search_technique import SearchTechnique
+
+    # setup the Kernel Tuner functionalities
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True, snap=True)
+    # using this instead of get_bounds because scaling is used
+    bounds, _, eps = cost_func.get_bounds_x0_eps()
+
+    # dynamically import the search technique based on the provided options
+    module_name,  = common.get_options(tuning_options.strategy_options, _options)
+    module = import_module(f"pyatf.search_techniques.{module_name}")
+    class_name = [d for d in dir(module) if d.lower() == module_name.replace('_','')][0]
+    searchtechnique_class = getattr(module, class_name)
+
+    # instantiate the search technique
+    search_technique = searchtechnique_class()
+    search_technique.initialize(len(searchspace.param_names))
+    assert isinstance(search_technique, SearchTechnique), f"Search technique {search_technique} is not a valid pyATF search technique."
+
+    # initialize the search space
+    # from pyatf.search_space import SearchSpace as PyATFSearchSpace
+    # assert searchspace.tune_params_pyatf is not None
+    # search_space = PyATFSearchSpace(*searchspace.tune_params_pyatf, enable_1d_access=False) # SearchTechnique1D currently not supported
+
+    # initialize
+    get_next_coordinates_or_indices = search_technique.get_next_coordinates
+    coordinates_or_indices = set()  # Set[Union[Coordinates, Index]]
+    costs = {}   # Dict[Union[Coordinates, Index], Cost]
+
+    try:
+        # optimization loop (KT-compatible re-implementation of `make_step` from TuningRun)
+        while True:
+
+            # get new coordinates
+            if not coordinates_or_indices:
+                if costs:
+                    search_technique.report_costs(costs)
+                    costs.clear()
+                coordinates_or_indices.update(get_next_coordinates_or_indices())
+
+            # get configuration
+            coords_or_index = coordinates_or_indices.pop()
+            # config = search_space.get_configuration(coords_or_index)
+            valid = True
+            cost = None
+
+            # convert normalized coordinates of each parameter to range of bounds (from [0, 1] to [bound[0], bound[1]])
+            if isinstance(coords_or_index, tuple):
+                coords_or_index = tuple(b[0]+c*(b[1]-b[0]) for c, b in zip(coords_or_index, bounds) if c is not None)
+
+            # evaluate the configuration
+            opt_result = cost_func(coords_or_index) 
+
+            # adjust opt_result to expected PyATF output in cost and valid
+            if not isinstance(opt_result, (int, float)):
+                valid = False
+            else:
+                cost = opt_result
+
+            # record the evaluation
+            costs[coords_or_index] = cost
+    except StopCriterionReached:
+        pass
+    finally:
+        search_technique.finalize()
+
+    return cost_func.results
+
+    # scale variables in x to make 'eps' relevant for multiple variables
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
+
+    opt_result = None
+    try:
+        opt_result = searchtechnique(cost_func)
+    except StopCriterionReached as e:
+        searchtechnique.finalize()
+        if tuning_options.verbose:
+            print(e)
+
+    if opt_result and tuning_options.verbose:
+        print(opt_result.message)
+
+    return cost_func.results
+
+
+# class TuningRun:
+#     def __init__(self,
+#                     search_space: SearchSpace | Tuple[TP, ...],
+#                     cost_function: CostFunction,
+#                     search_technique: Optional[Union[SearchTechnique, SearchTechnique1D]],
+#                     verbosity: Optional[int],
+#                     log_file: Optional[str],
+#                     abort_condition: Optional[AbortCondition]):
+#         if search_space is None:
+#             raise ValueError('missing call to `Tuner.tuning_parameters(...)`: no tuning parameters defined')
+
+#         # tuning data
+#         self._search_space: SearchSpace
+#         self._search_technique: SearchTechnique | SearchTechnique1D
+#         self._abort_condition: AbortCondition  # TODO: does not work (add initialization)
+#         self._tps: Tuple[TP, ...]
+#         self._tuning_data: Optional[TuningData] = None
+#         self._cost_function: CostFunction = cost_function
+
+#         # progress data
+#         self._verbosity = verbosity
+#         self._log_file: Optional[TextIO] = None
+#         self._last_log_dump: Optional[int] = None
+#         self._last_line_length: Optional[int] = None
+#         self._tuning_start_ns: Optional[int] = None
+
+#         # prepare search technique
+#         self._search_technique: SearchTechnique | SearchTechnique1D = search_technique
+#         if self._search_technique is None:
+#             self._search_technique = AUCBandit()
+#         if isinstance(self._search_technique, SearchTechnique):
+#             self._get_next_coordinates_or_indices = self._search_technique.get_next_coordinates
+#             self._coordinates_or_index_param_name = 'search_space_coordinates'
+#         else:
+#             self._get_next_coordinates_or_indices = self._search_technique.get_next_indices
+#             self._coordinates_or_index_param_name = 'search_space_index'
+#         self._coordinates_or_indices: Set[Union[Coordinates, Index]] = set()
+#         self._costs: Dict[Union[Coordinates, Index], Cost] = {}
+
+#         # generate search space
+#         if isinstance(search_space, SearchSpace):
+#             self._search_space = search_space
+#         else:
+#             self._search_space = SearchSpace(*search_space,
+#                                                 enable_1d_access=isinstance(self._search_technique, SearchTechnique1D),
+#                                                 verbosity=verbosity)
+#         self._tps = self._search_space.tps
+#         self._search_space_generation_ns = self._search_space.generation_ns
+#         if self._verbosity >= 2:
+#             print(f'search space size: {self._search_space.constrained_size}')
+
+#         # prepare abort condition
+#         self._abort_condition = abort_condition
+#         if self._abort_condition is None:
+#             self._abort_condition = Evaluations(len(self._search_space))
+
+#         # open log file
+#         if log_file:
+#             Path(log_file).parent.mkdir(parents=True, exist_ok=True)
+#             self._log_file = open(log_file, 'w')
+
+#     def __del__(self):
+#         if self._log_file:
+#             self._log_file.close()
+
+#     @property
+#     def cost_function(self):
+#         return self._cost_function
+
+#     @property
+#     def abort_condition(self):
+#         return self._abort_condition
+
+#     @property
+#     def tuning_data(self):
+#         return self._tuning_data
+
+#     def flush_log(self):
+#         if self._log_file:
+#             self._log_file.seek(0)
+#             json.dump(self._tuning_data.to_json(), self._log_file, indent=4)
+#             self._log_file.truncate()
+#             self._last_log_dump = time.perf_counter_ns()
+
+#     def _print_progress(self, timestamp: datetime, cost: Optional[Cost] = None):
+#         now = time.perf_counter_ns()
+#         elapsed_ns = now - self._tuning_start_ns
+#         elapsed_seconds = elapsed_ns // 1000000000
+#         elapsed_time_str = (f'{elapsed_seconds // 3600}'
+#                             f':{elapsed_seconds // 60 % 60:02d}'
+#                             f':{elapsed_seconds % 60:02d}')
+#         progress = self._abort_condition.progress(self._tuning_data)
+#         if self._verbosity >= 3:
+#             line = (f'\r{timestamp.strftime("%Y-%m-%dT%H:%M:%S")}'
+#                     f'    evaluations: {self._tuning_data.number_of_evaluated_configurations}'
+#                     f' (valid: {self._tuning_data.number_of_evaluated_valid_configurations})'
+#                     f', min. cost: {self._tuning_data.min_cost()}'
+#                     f', valid: {cost is not None}'
+#                     f', cost: {cost}')
+#             line_length = len(line)
+#             if line_length < self._last_line_length:
+#                 line += ' ' * (self._last_line_length - line_length)
+#             print(line)
+#         if progress is None:
+#             spinner_char = ('-', '\\', '|', '/')[(elapsed_ns // 500000000) % 4]
+#             line = f'\rTuning: {spinner_char} {elapsed_time_str}\r'
+#             print(line, end='')
+#         else:
+#             if now > self._tuning_start_ns and progress > 0:
+#                 eta_seconds = ceil(((now - self._tuning_start_ns) / progress
+#                                     * (1 - progress)) / 1000000000)
+#                 eta_str = (f'{eta_seconds // 3600}'
+#                             f':{eta_seconds // 60 % 60:02d}'
+#                             f':{eta_seconds % 60:02d}')
+#             else:
+#                 eta_str = '?'
+#             filled = '█' * floor(progress * 80)
+#             empty = ' ' * ceil((1 - progress) * 80)
+#             line = (f'\rexploring search space: |{filled}{empty}|'
+#                     f' {progress * 100:6.2f}% {elapsed_time_str} (ETA: {eta_str})')
+#             print(line, end='')
+#         self._last_line_length = len(line)
+
+#     def initialize(self):
+#         # reset progress data
+#         self._tuning_start_ns = time.perf_counter_ns()
+#         self._last_line_length = 0
+
+#         # create tuning data
+#         self._tuning_data = TuningData(list(tp.to_json() for tp in self._tps),
+#                                         self._search_space.constrained_size,
+#                                         self._search_space.unconstrained_size,
+#                                         self._search_space_generation_ns,
+#                                         self._search_technique.to_json(),
+#                                         self._abort_condition.to_json())
+
+#         # write tuning data
+#         self.flush_log()
+
+#         # initialize search technique
+#         if isinstance(self._search_technique, SearchTechnique1D):
+#             self._search_technique.initialize(len(self._search_space))
+#         else:
+#             self._search_technique.initialize(self._search_space.num_tps)
+
+#     def make_step(self):
+#         # get new coordinates
+#         if not self._coordinates_or_indices:
+#             if self._costs:
+#                 self._search_technique.report_costs(self._costs)
+#                 self._costs.clear()
+#             self._coordinates_or_indices.update(self._get_next_coordinates_or_indices())
+
+#         # get configuration
+#         coords_or_index = self._coordinates_or_indices.pop()
+#         config = self._search_space.get_configuration(coords_or_index)
+
+#         # run cost function
+#         valid = True
+#         try:
+#             cost = self._cost_function(config)
+#         except CostFunctionError as e:
+#             if self._verbosity >= 3:
+#                 print('\r' + ' ' * self._last_line_length + '\r', end='')
+#                 print('Error raised: ' + e.message)
+#                 self._last_line_length = 0
+#             cost = None
+#             valid = False
+#         except BaseException as e:
+#             self._tuning_data.record_evaluation(config, False, None, **{
+#                 self._coordinates_or_index_param_name: coords_or_index
+#             })
+#             self.flush_log()
+#             raise e
+#         timestamp = self._tuning_data.record_evaluation(config, valid, cost, **{
+#             self._coordinates_or_index_param_name: coords_or_index
+#         })
+#         self._costs[coords_or_index] = cost
+
+#         # print progress and dump log file (at most once every 5 minutes)
+#         if self._verbosity >= 1:
+#             self._print_progress(timestamp, cost)
+#         if self._log_file and (self._last_log_dump is None or time.perf_counter_ns() - self._last_log_dump > 3e11):
+#             self.flush_log()
+
+#     def finalize(self, sigint_received: bool = False):
+#         self._search_technique.finalize()
+#         self._tuning_data.record_tuning_finished(sigint_received)
+
+#         # write tuning data to file
+#         if self._log_file:
+#             self.flush_log()
+#             self._log_file.close()
+#             self._log_file = None
+
+#         if self._verbosity >= 1:
+#             print('\nfinished tuning')
+#             if self._verbosity >= 2:
+#                 if self._tuning_data.min_cost() is not None:
+#                     print('best configuration:')
+#                     for tp_name, tp_value in self._tuning_data.configuration_of_min_cost().items():
+#                         print(f'    {tp_name} = {tp_value}')
+#                     print(f'min cost: {self._tuning_data.min_cost()}')
+
+
+tune.__doc__ = common.get_strategy_docstring("pyatf_strategies", _options)

From c2c94589d000b6a1e5bed3488e0ed39207e6d33f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 20 Jun 2025 09:05:21 +0200
Subject: [PATCH 185/253] Improvements to the stop criterion and cost function
 return values

---
 kernel_tuner/strategies/common.py | 3 ++-
 kernel_tuner/util.py              | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 34a2a8b32..ffff80d82 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -180,7 +180,8 @@ def __call__(self, x, check_restrictions=True):
             return_value = result[self.tuning_options.objective]
         else:
             return_value = result[self.tuning_options.objective] or sys.float_info.max
-        return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value
+        if not isinstance(return_value, util.ErrorConfig):
+            return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value
 
         # include raw data in return if requested
         if self.return_raw is not None:
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 5c2aa4cfe..5bdb884d1 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -205,7 +205,8 @@ def check_stop_criterion(to: dict) -> float:
     if "max_fevals" in to:
         if len(to.unique_results) >= to.max_fevals:
             raise StopCriterionReached(f"max_fevals ({to.max_fevals}) reached")
-        return len(to.unique_results) / to.max_fevals
+        if not "time_limit" in to:
+            return len(to.unique_results) / to.max_fevals
     if "time_limit" in to:
         time_spent = (time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time
         if time_spent > to.time_limit:

From 62d98df4154f962b708a6bae024ace4e12b8d6c9 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 20 Jun 2025 09:06:10 +0200
Subject: [PATCH 186/253] Various improvements to the handling of budget and
 return values in pyatf strategies

---
 kernel_tuner/strategies/pyatf_strategies.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py
index 929b12613..949748cd3 100644
--- a/kernel_tuner/strategies/pyatf_strategies.py
+++ b/kernel_tuner/strategies/pyatf_strategies.py
@@ -15,7 +15,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     from pyatf.search_techniques.search_technique import SearchTechnique
 
     # setup the Kernel Tuner functionalities
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True, snap=True)
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True, snap=True, return_invalid=True)
     # using this instead of get_bounds because scaling is used
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
@@ -39,10 +39,11 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     get_next_coordinates_or_indices = search_technique.get_next_coordinates
     coordinates_or_indices = set()  # Set[Union[Coordinates, Index]]
     costs = {}   # Dict[Union[Coordinates, Index], Cost]
+    eval_count = 0
 
     try:
         # optimization loop (KT-compatible re-implementation of `make_step` from TuningRun)
-        while True:
+        while eval_count < searchspace.size:
 
             # get new coordinates
             if not coordinates_or_indices:
@@ -62,13 +63,14 @@ def tune(searchspace: Searchspace, runner, tuning_options):
                 coords_or_index = tuple(b[0]+c*(b[1]-b[0]) for c, b in zip(coords_or_index, bounds) if c is not None)
 
             # evaluate the configuration
-            opt_result = cost_func(coords_or_index) 
+            opt_result = cost_func(coords_or_index)
 
             # adjust opt_result to expected PyATF output in cost and valid
             if not isinstance(opt_result, (int, float)):
                 valid = False
             else:
                 cost = opt_result
+                eval_count += 1
 
             # record the evaluation
             costs[coords_or_index] = cost

From 30de03e23b86ed03f85e56f03ab4a1db85df902c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 21 Jun 2025 00:52:12 +0200
Subject: [PATCH 187/253] Added deepcopy of unmodified restrictions for
 reconstructing Searchspace later

---
 kernel_tuner/interface.py          | 10 ++--------
 kernel_tuner/runners/simulation.py |  2 +-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index e715f4a86..149d5be59 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -30,6 +30,7 @@
 from datetime import datetime
 from pathlib import Path
 from time import perf_counter
+from copy import deepcopy
 
 import numpy
 from constraint import Constraint
@@ -607,14 +608,6 @@ def tune_kernel(
     # ensure there is always at least three names
     util.append_default_block_size_names(block_size_names)
 
-    # if the restrictions are not constraints or a callable, the restrictions are strings, so parse them to functions (increases restrictions check performance significantly)
-    if (
-        restrictions is not None
-        and not callable(restrictions)
-        and not any(isinstance(r, Constraint) for r in restrictions)
-    ):
-        restrictions = util.compile_restrictions(restrictions, tune_params)
-
     # sort all the options into separate dicts
     opts = locals()
     kernel_options = Options([(k, opts[k]) for k in _kernel_options.keys()])
@@ -675,6 +668,7 @@ def preprocess_cache(filepath):
         tuning_options.cachefile = None
 
     # create search space
+    tuning_options.restrictions_unmodified = deepcopy(restrictions)
     searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads)
     restrictions = searchspace._modified_restrictions
     tuning_options.restrictions = restrictions
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 22c7c667c..a08adcb01 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -82,7 +82,7 @@ def run(self, parameter_space, tuning_options):
 
         results = []
 
-        # iterate over parameter space
+        # iterate over parameter space 
         for element in parameter_space:
 
             # check if element is in the cache

From 49e786debcf93c3736db6ebc718a9e91ac714dc5 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 21 Jun 2025 00:56:28 +0200
Subject: [PATCH 188/253] Search space construction can be deffered to a later
 time, split pyATF search space building and tunable parameter generation

---
 kernel_tuner/searchspace.py | 87 ++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index ffbfa569a..19263a20a 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -47,6 +47,7 @@ def __init__(
         restrictions,
         max_threads: int,
         block_size_names=default_block_size_names,
+        defer_construction=False,
         build_neighbors_index=False,
         neighbor_method=None,
         from_cache: dict = None,
@@ -62,6 +63,7 @@ def __init__(
             Hamming: any parameter config with 1 different parameter value is a neighbor
         Optionally sort the searchspace by the order in which the parameter values were specified. By default, sort goes from first to last parameter, to reverse this use sort_last_param_first.
         Optionally an imported cache can be used instead with `from_cache`, in which case the `tune_params`, `restrictions` and `max_threads` arguments can be set to None, and construction is skipped.
+        Optionally construction can be deffered to a later time by setting `defer_construction` to True, in which case the searchspace is not built on instantiation (experimental).
         """
         # check the arguments
         if from_cache is not None:
@@ -76,7 +78,8 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.tune_params_pyatf = None
+        self.max_threads = max_threads
+        self.block_size_names = block_size_names
         self._tensorspace = None
         self.tensor_dtype = torch.float32 if torch_available else None
         self.tensor_device = torch.device("cpu") if torch_available else None
@@ -160,17 +163,19 @@ def __init__(
             else:
                 raise ValueError(f"Solver method {solver_method} not recognized.")
 
-            # build the search space
-            self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)
+            if not defer_construction:
+                # build the search space
+                self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)
 
         # finalize construction
-        self.__numpy = None
-        self.num_params = len(self.tune_params)
-        self.indices = np.arange(self.size)
-        if neighbor_method is not None and neighbor_method != "Hamming":
-            self.__prepare_neighbors_index()
-        if build_neighbors_index:
-            self.neighbors_index = self.__build_neighbors_index(neighbor_method)
+        if not defer_construction:
+            self.__numpy = None
+            self.num_params = len(self.tune_params)
+            self.indices = np.arange(self.size)
+            if neighbor_method is not None and neighbor_method != "Hamming":
+                self.__prepare_neighbors_index()
+            if build_neighbors_index:
+                self.neighbors_index = self.__build_neighbors_index(neighbor_method)
 
     # def __build_searchspace_ortools(self, block_size_names: list, max_threads: int) -> Tuple[List[tuple], np.ndarray, dict, int]:
     #     # Based on https://developers.google.com/optimization/cp/cp_solver#python_2
@@ -318,14 +323,15 @@ def all_smt(formula, keys) -> list:
 
         return self.__parameter_space_list_to_lookup_and_return_type(parameter_space_list)
 
-    def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, solver: Solver):
-        """Builds the searchspace using pyATF."""
-        from pyatf import TP, Interval, Set, Tuner
-        from pyatf.cost_functions.generic import CostFunction
-        from pyatf.search_techniques import Exhaustive
+    def get_tune_params_pyatf(self, block_size_names: list = None, max_threads: int = None):
+        """Convert the tune_params and restrictions to pyATF tunable parameters."""
+        from pyatf import TP, Interval, Set
 
-        # Define a bogus cost function
-        costfunc = CostFunction(":")  # bash no-op
+        # if block_size_names or max_threads are not specified, use the defaults
+        if block_size_names is None:
+            block_size_names = self.block_size_names
+        if max_threads is None:
+            max_threads = self.max_threads
 
         # add the Kernel Tuner default blocksize threads restrictions
         assert isinstance(self.restrictions, list)
@@ -359,27 +365,36 @@ def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, so
                     registered_restrictions.append(index)
 
         # define the Tunable Parameters
-        def get_params():
-            params = list()
-            for index, (key, values) in enumerate(self.tune_params.items()):
-                vi = get_interval(values)
-                vals = (
-                    Interval(vi[0], vi[1], vi[2]) if vi is not None and vi[2] != 0 else Set(*np.array(values).flatten())
-                )
-                constraint = res_dict.get(key, None)
-                constraint_source = None
-                if constraint is not None:
-                    constraint, constraint_source = constraint
-                # in case of a leftover monolithic restriction, append at the last parameter
-                if index == len(self.tune_params) - 1 and len(res_dict) == 0 and len(self.restrictions) == 1:
-                    res, params, source = self.restrictions[0]
-                    assert callable(res)
-                    constraint = res
-                params.append(TP(key, vals, constraint, constraint_source))
-            return params
+        params = list()
+        for index, (key, values) in enumerate(self.tune_params.items()):
+            vi = get_interval(values)
+            vals = (
+                Interval(vi[0], vi[1], vi[2]) if vi is not None and vi[2] != 0 else Set(*np.array(values).flatten())
+            )
+            constraint = res_dict.get(key, None)
+            constraint_source = None
+            if constraint is not None:
+                constraint, constraint_source = constraint
+            # in case of a leftover monolithic restriction, append at the last parameter
+            if index == len(self.tune_params) - 1 and len(res_dict) == 0 and len(self.restrictions) == 1:
+                res, params, source = self.restrictions[0]
+                assert callable(res)
+                constraint = res
+            params.append(TP(key, vals, constraint, constraint_source))
+        return params
+
+
+    def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, solver: Solver):
+        """Builds the searchspace using pyATF."""
+        from pyatf import Tuner
+        from pyatf.cost_functions.generic import CostFunction
+        from pyatf.search_techniques import Exhaustive
+
+        # Define a bogus cost function
+        costfunc = CostFunction(":")  # bash no-op
         
         # set data
-        self.tune_params_pyatf = get_params()
+        self.tune_params_pyatf = self.get_tune_params_pyatf(block_size_names, max_threads)
 
         # tune
         _, _, tuning_data = (

From 36208d1783a92e9f8dad7f416c5242f46a0a6911 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 21 Jun 2025 00:58:00 +0200
Subject: [PATCH 189/253] Implemented pyATF search space lookup of configs

---
 kernel_tuner/strategies/pyatf_strategies.py | 46 ++++++++-------------
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py
index 949748cd3..bfecbb22b 100644
--- a/kernel_tuner/strategies/pyatf_strategies.py
+++ b/kernel_tuner/strategies/pyatf_strategies.py
@@ -7,17 +7,16 @@
 from kernel_tuner.strategies.common import CostFunc
 from kernel_tuner.util import StopCriterionReached
 
-supported_searchtechniques = ["auc_bandit", "differential_evolution", "pattern_search", "round_robin", "simulated_annealing"]
+supported_searchtechniques = ["auc_bandit", "differential_evolution", "pattern_search", "round_robin", "simulated_annealing", "torczon"]
 
 _options = dict(searchtechnique=(f"PyATF optimization algorithm to use, choose any from {supported_searchtechniques}", "simulated_annealing"))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     from pyatf.search_techniques.search_technique import SearchTechnique
+    from pyatf.search_space import SearchSpace as pyATFSearchSpace
 
     # setup the Kernel Tuner functionalities
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True, snap=True, return_invalid=True)
-    # using this instead of get_bounds because scaling is used
-    bounds, _, eps = cost_func.get_bounds_x0_eps()
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, snap=False, return_invalid=False)
 
     # dynamically import the search technique based on the provided options
     module_name,  = common.get_options(tuning_options.strategy_options, _options)
@@ -31,9 +30,17 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     assert isinstance(search_technique, SearchTechnique), f"Search technique {search_technique} is not a valid pyATF search technique."
 
     # initialize the search space
-    # from pyatf.search_space import SearchSpace as PyATFSearchSpace
-    # assert searchspace.tune_params_pyatf is not None
-    # search_space = PyATFSearchSpace(*searchspace.tune_params_pyatf, enable_1d_access=False) # SearchTechnique1D currently not supported
+    searchspace_pyatf = Searchspace(
+        searchspace.tune_params, 
+        tuning_options.restrictions_unmodified, 
+        searchspace.max_threads, 
+        searchspace.block_size_names, 
+        defer_construction=True,
+        framework="pyatf"
+    )
+    tune_params_pyatf = searchspace_pyatf.get_tune_params_pyatf()
+    assert isinstance(tune_params_pyatf, (tuple, list)), f"Tuning parameters must be a tuple or list of tuples, is {type(tune_params_pyatf)} ({tune_params_pyatf})."
+    search_space_pyatf = pyATFSearchSpace(*tune_params_pyatf, enable_1d_access=False) # SearchTechnique1D currently not supported
 
     # initialize
     get_next_coordinates_or_indices = search_technique.get_next_coordinates
@@ -54,16 +61,13 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
             # get configuration
             coords_or_index = coordinates_or_indices.pop()
-            # config = search_space.get_configuration(coords_or_index)
+            config = search_space_pyatf.get_configuration(coords_or_index)
             valid = True
             cost = None
 
-            # convert normalized coordinates of each parameter to range of bounds (from [0, 1] to [bound[0], bound[1]])
-            if isinstance(coords_or_index, tuple):
-                coords_or_index = tuple(b[0]+c*(b[1]-b[0]) for c, b in zip(coords_or_index, bounds) if c is not None)
-
             # evaluate the configuration
-            opt_result = cost_func(coords_or_index)
+            x = tuple([config[k] for k in searchspace.tune_params.keys()])
+            opt_result = cost_func(x, check_restrictions=False)
 
             # adjust opt_result to expected PyATF output in cost and valid
             if not isinstance(opt_result, (int, float)):
@@ -81,22 +85,6 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     return cost_func.results
 
-    # scale variables in x to make 'eps' relevant for multiple variables
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
-
-    opt_result = None
-    try:
-        opt_result = searchtechnique(cost_func)
-    except StopCriterionReached as e:
-        searchtechnique.finalize()
-        if tuning_options.verbose:
-            print(e)
-
-    if opt_result and tuning_options.verbose:
-        print(opt_result.message)
-
-    return cost_func.results
-
 
 # class TuningRun:
 #     def __init__(self,

From 934be28aad8e8b75de258853fc25da2dfa2dfbc3 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sun, 22 Jun 2025 01:00:23 +0200
Subject: [PATCH 190/253] Implemented caching of PyATF searchspace object, both
 storage and retrieval

---
 kernel_tuner/strategies/pyatf_strategies.py | 257 ++++----------------
 1 file changed, 41 insertions(+), 216 deletions(-)

diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py
index bfecbb22b..7baaa6aff 100644
--- a/kernel_tuner/strategies/pyatf_strategies.py
+++ b/kernel_tuner/strategies/pyatf_strategies.py
@@ -1,6 +1,8 @@
 """Strategy that dynamically imports and enables the use of pyATF strategies."""
 
 from importlib import import_module
+import zlib
+from pathlib import Path
 
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
@@ -11,9 +13,26 @@
 
 _options = dict(searchtechnique=(f"PyATF optimization algorithm to use, choose any from {supported_searchtechniques}", "simulated_annealing"))
 
+def get_cache_checksum(d: dict):
+    checksum=0
+    for item in d.items():
+        c1 = 1
+        for t in item:
+            c1 = zlib.adler32(bytes(repr(t),'utf-8'), c1)
+        checksum=checksum ^ c1
+    return checksum
+
 def tune(searchspace: Searchspace, runner, tuning_options):
     from pyatf.search_techniques.search_technique import SearchTechnique
     from pyatf.search_space import SearchSpace as pyATFSearchSpace
+    from pyatf import TP
+    try:
+        import dill
+        pyatf_search_space_caching = True
+    except ImportError:
+        from warnings import warn
+        pyatf_search_space_caching = False
+        warn("dill is not installed, pyATF search space caching will not be used.")
 
     # setup the Kernel Tuner functionalities
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, snap=False, return_invalid=False)
@@ -29,18 +48,29 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     search_technique.initialize(len(searchspace.param_names))
     assert isinstance(search_technique, SearchTechnique), f"Search technique {search_technique} is not a valid pyATF search technique."
 
+    # get the search space hash
+    tune_params_hashable = {k: ",".join([str(i) for i in v]) if isinstance(v, (list, tuple)) else v for k, v in searchspace.tune_params.items()}
+    searchspace_caches_folder = Path("./pyatf_searchspace_caches")
+    searchspace_caches_folder.mkdir(parents=True, exist_ok=True)
+    searchspace_cache_path = searchspace_caches_folder / Path(f"pyatf_searchspace_cache_{get_cache_checksum(tune_params_hashable)}.pkl")
+
     # initialize the search space
-    searchspace_pyatf = Searchspace(
-        searchspace.tune_params, 
-        tuning_options.restrictions_unmodified, 
-        searchspace.max_threads, 
-        searchspace.block_size_names, 
-        defer_construction=True,
-        framework="pyatf"
-    )
-    tune_params_pyatf = searchspace_pyatf.get_tune_params_pyatf()
-    assert isinstance(tune_params_pyatf, (tuple, list)), f"Tuning parameters must be a tuple or list of tuples, is {type(tune_params_pyatf)} ({tune_params_pyatf})."
-    search_space_pyatf = pyATFSearchSpace(*tune_params_pyatf, enable_1d_access=False) # SearchTechnique1D currently not supported
+    if not pyatf_search_space_caching or not searchspace_cache_path.exists():
+        searchspace_pyatf = Searchspace(
+            searchspace.tune_params, 
+            tuning_options.restrictions_unmodified, 
+            searchspace.max_threads, 
+            searchspace.block_size_names, 
+            defer_construction=True,
+            framework="pyatf"
+        )
+        tune_params_pyatf = searchspace_pyatf.get_tune_params_pyatf()
+        assert isinstance(tune_params_pyatf, (tuple, list)), f"Tuning parameters must be a tuple or list of tuples, is {type(tune_params_pyatf)} ({tune_params_pyatf})."
+        search_space_pyatf = pyATFSearchSpace(*tune_params_pyatf, enable_1d_access=False) # SearchTechnique1D currently not supported
+        if pyatf_search_space_caching:
+            dill.dump(search_space_pyatf, open(searchspace_cache_path, "wb"))
+    elif searchspace_cache_path.exists():
+        search_space_pyatf = dill.load(open(searchspace_cache_path, "rb"))
 
     # initialize
     get_next_coordinates_or_indices = search_technique.get_next_coordinates
@@ -86,209 +116,4 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     return cost_func.results
 
 
-# class TuningRun:
-#     def __init__(self,
-#                     search_space: SearchSpace | Tuple[TP, ...],
-#                     cost_function: CostFunction,
-#                     search_technique: Optional[Union[SearchTechnique, SearchTechnique1D]],
-#                     verbosity: Optional[int],
-#                     log_file: Optional[str],
-#                     abort_condition: Optional[AbortCondition]):
-#         if search_space is None:
-#             raise ValueError('missing call to `Tuner.tuning_parameters(...)`: no tuning parameters defined')
-
-#         # tuning data
-#         self._search_space: SearchSpace
-#         self._search_technique: SearchTechnique | SearchTechnique1D
-#         self._abort_condition: AbortCondition  # TODO: does not work (add initialization)
-#         self._tps: Tuple[TP, ...]
-#         self._tuning_data: Optional[TuningData] = None
-#         self._cost_function: CostFunction = cost_function
-
-#         # progress data
-#         self._verbosity = verbosity
-#         self._log_file: Optional[TextIO] = None
-#         self._last_log_dump: Optional[int] = None
-#         self._last_line_length: Optional[int] = None
-#         self._tuning_start_ns: Optional[int] = None
-
-#         # prepare search technique
-#         self._search_technique: SearchTechnique | SearchTechnique1D = search_technique
-#         if self._search_technique is None:
-#             self._search_technique = AUCBandit()
-#         if isinstance(self._search_technique, SearchTechnique):
-#             self._get_next_coordinates_or_indices = self._search_technique.get_next_coordinates
-#             self._coordinates_or_index_param_name = 'search_space_coordinates'
-#         else:
-#             self._get_next_coordinates_or_indices = self._search_technique.get_next_indices
-#             self._coordinates_or_index_param_name = 'search_space_index'
-#         self._coordinates_or_indices: Set[Union[Coordinates, Index]] = set()
-#         self._costs: Dict[Union[Coordinates, Index], Cost] = {}
-
-#         # generate search space
-#         if isinstance(search_space, SearchSpace):
-#             self._search_space = search_space
-#         else:
-#             self._search_space = SearchSpace(*search_space,
-#                                                 enable_1d_access=isinstance(self._search_technique, SearchTechnique1D),
-#                                                 verbosity=verbosity)
-#         self._tps = self._search_space.tps
-#         self._search_space_generation_ns = self._search_space.generation_ns
-#         if self._verbosity >= 2:
-#             print(f'search space size: {self._search_space.constrained_size}')
-
-#         # prepare abort condition
-#         self._abort_condition = abort_condition
-#         if self._abort_condition is None:
-#             self._abort_condition = Evaluations(len(self._search_space))
-
-#         # open log file
-#         if log_file:
-#             Path(log_file).parent.mkdir(parents=True, exist_ok=True)
-#             self._log_file = open(log_file, 'w')
-
-#     def __del__(self):
-#         if self._log_file:
-#             self._log_file.close()
-
-#     @property
-#     def cost_function(self):
-#         return self._cost_function
-
-#     @property
-#     def abort_condition(self):
-#         return self._abort_condition
-
-#     @property
-#     def tuning_data(self):
-#         return self._tuning_data
-
-#     def flush_log(self):
-#         if self._log_file:
-#             self._log_file.seek(0)
-#             json.dump(self._tuning_data.to_json(), self._log_file, indent=4)
-#             self._log_file.truncate()
-#             self._last_log_dump = time.perf_counter_ns()
-
-#     def _print_progress(self, timestamp: datetime, cost: Optional[Cost] = None):
-#         now = time.perf_counter_ns()
-#         elapsed_ns = now - self._tuning_start_ns
-#         elapsed_seconds = elapsed_ns // 1000000000
-#         elapsed_time_str = (f'{elapsed_seconds // 3600}'
-#                             f':{elapsed_seconds // 60 % 60:02d}'
-#                             f':{elapsed_seconds % 60:02d}')
-#         progress = self._abort_condition.progress(self._tuning_data)
-#         if self._verbosity >= 3:
-#             line = (f'\r{timestamp.strftime("%Y-%m-%dT%H:%M:%S")}'
-#                     f'    evaluations: {self._tuning_data.number_of_evaluated_configurations}'
-#                     f' (valid: {self._tuning_data.number_of_evaluated_valid_configurations})'
-#                     f', min. cost: {self._tuning_data.min_cost()}'
-#                     f', valid: {cost is not None}'
-#                     f', cost: {cost}')
-#             line_length = len(line)
-#             if line_length < self._last_line_length:
-#                 line += ' ' * (self._last_line_length - line_length)
-#             print(line)
-#         if progress is None:
-#             spinner_char = ('-', '\\', '|', '/')[(elapsed_ns // 500000000) % 4]
-#             line = f'\rTuning: {spinner_char} {elapsed_time_str}\r'
-#             print(line, end='')
-#         else:
-#             if now > self._tuning_start_ns and progress > 0:
-#                 eta_seconds = ceil(((now - self._tuning_start_ns) / progress
-#                                     * (1 - progress)) / 1000000000)
-#                 eta_str = (f'{eta_seconds // 3600}'
-#                             f':{eta_seconds // 60 % 60:02d}'
-#                             f':{eta_seconds % 60:02d}')
-#             else:
-#                 eta_str = '?'
-#             filled = '█' * floor(progress * 80)
-#             empty = ' ' * ceil((1 - progress) * 80)
-#             line = (f'\rexploring search space: |{filled}{empty}|'
-#                     f' {progress * 100:6.2f}% {elapsed_time_str} (ETA: {eta_str})')
-#             print(line, end='')
-#         self._last_line_length = len(line)
-
-#     def initialize(self):
-#         # reset progress data
-#         self._tuning_start_ns = time.perf_counter_ns()
-#         self._last_line_length = 0
-
-#         # create tuning data
-#         self._tuning_data = TuningData(list(tp.to_json() for tp in self._tps),
-#                                         self._search_space.constrained_size,
-#                                         self._search_space.unconstrained_size,
-#                                         self._search_space_generation_ns,
-#                                         self._search_technique.to_json(),
-#                                         self._abort_condition.to_json())
-
-#         # write tuning data
-#         self.flush_log()
-
-#         # initialize search technique
-#         if isinstance(self._search_technique, SearchTechnique1D):
-#             self._search_technique.initialize(len(self._search_space))
-#         else:
-#             self._search_technique.initialize(self._search_space.num_tps)
-
-#     def make_step(self):
-#         # get new coordinates
-#         if not self._coordinates_or_indices:
-#             if self._costs:
-#                 self._search_technique.report_costs(self._costs)
-#                 self._costs.clear()
-#             self._coordinates_or_indices.update(self._get_next_coordinates_or_indices())
-
-#         # get configuration
-#         coords_or_index = self._coordinates_or_indices.pop()
-#         config = self._search_space.get_configuration(coords_or_index)
-
-#         # run cost function
-#         valid = True
-#         try:
-#             cost = self._cost_function(config)
-#         except CostFunctionError as e:
-#             if self._verbosity >= 3:
-#                 print('\r' + ' ' * self._last_line_length + '\r', end='')
-#                 print('Error raised: ' + e.message)
-#                 self._last_line_length = 0
-#             cost = None
-#             valid = False
-#         except BaseException as e:
-#             self._tuning_data.record_evaluation(config, False, None, **{
-#                 self._coordinates_or_index_param_name: coords_or_index
-#             })
-#             self.flush_log()
-#             raise e
-#         timestamp = self._tuning_data.record_evaluation(config, valid, cost, **{
-#             self._coordinates_or_index_param_name: coords_or_index
-#         })
-#         self._costs[coords_or_index] = cost
-
-#         # print progress and dump log file (at most once every 5 minutes)
-#         if self._verbosity >= 1:
-#             self._print_progress(timestamp, cost)
-#         if self._log_file and (self._last_log_dump is None or time.perf_counter_ns() - self._last_log_dump > 3e11):
-#             self.flush_log()
-
-#     def finalize(self, sigint_received: bool = False):
-#         self._search_technique.finalize()
-#         self._tuning_data.record_tuning_finished(sigint_received)
-
-#         # write tuning data to file
-#         if self._log_file:
-#             self.flush_log()
-#             self._log_file.close()
-#             self._log_file = None
-
-#         if self._verbosity >= 1:
-#             print('\nfinished tuning')
-#             if self._verbosity >= 2:
-#                 if self._tuning_data.min_cost() is not None:
-#                     print('best configuration:')
-#                     for tp_name, tp_value in self._tuning_data.configuration_of_min_cost().items():
-#                         print(f'    {tp_name} = {tp_value}')
-#                     print(f'min cost: {self._tuning_data.min_cost()}')
-
-
 tune.__doc__ = common.get_strategy_docstring("pyatf_strategies", _options)

From 1d916bd866dbad0f8c802f9ffecf34ccfa5b3758 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 23 Jun 2025 14:51:01 +0200
Subject: [PATCH 191/253] Implemented passing whether or not to use the
 searchspace cache as a hyperparameter

---
 kernel_tuner/strategies/pyatf_strategies.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py
index 7baaa6aff..92bbc718a 100644
--- a/kernel_tuner/strategies/pyatf_strategies.py
+++ b/kernel_tuner/strategies/pyatf_strategies.py
@@ -11,7 +11,10 @@
 
 supported_searchtechniques = ["auc_bandit", "differential_evolution", "pattern_search", "round_robin", "simulated_annealing", "torczon"]
 
-_options = dict(searchtechnique=(f"PyATF optimization algorithm to use, choose any from {supported_searchtechniques}", "simulated_annealing"))
+_options = dict(
+    searchtechnique=(f"PyATF optimization algorithm to use, choose any from {supported_searchtechniques}", "simulated_annealing"),
+    use_searchspace_cache=(f"Use a cached search space if available, otherwise create a new one.", False)
+)
 
 def get_cache_checksum(d: dict):
     checksum=0
@@ -26,9 +29,13 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     from pyatf.search_techniques.search_technique import SearchTechnique
     from pyatf.search_space import SearchSpace as pyATFSearchSpace
     from pyatf import TP
+
+    # get the search technique module name and whether to use search space caching
+    module_name, use_searchspace_cache = common.get_options(tuning_options.strategy_options, _options)
     try:
-        import dill
-        pyatf_search_space_caching = True
+        if use_searchspace_cache:
+            import dill
+        pyatf_search_space_caching = use_searchspace_cache
     except ImportError:
         from warnings import warn
         pyatf_search_space_caching = False
@@ -38,7 +45,6 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, snap=False, return_invalid=False)
 
     # dynamically import the search technique based on the provided options
-    module_name,  = common.get_options(tuning_options.strategy_options, _options)
     module = import_module(f"pyatf.search_techniques.{module_name}")
     class_name = [d for d in dir(module) if d.lower() == module_name.replace('_','')][0]
     searchtechnique_class = getattr(module, class_name)

From 08a1029a4c2e411ed437ef73c4293e3818797811 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Wed, 25 Jun 2025 13:57:15 +0200
Subject: [PATCH 192/253] changed SA to act similar with or without constraint
 awareness

---
 .../strategies/simulated_annealing.py         | 25 +++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 517203fad..4ab6690cc 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -115,17 +115,28 @@ def acceptance_prob(old_cost, new_cost, T, tuning_options):
 def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
     """Return a random neighbor of pos."""
 
+    def random_neighbor(pos, method):
+        """Helper method to return a random neighbor."""
+        neighbors = searchspace.get_neighbors_no_cache(pos, neighbor_method=method)
+        if not neighbors:
+            return pos
+        return random.choice(neighbors)
+
+    size = len(pos)
+
     if constraint_aware:
-        # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions
-        neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent')
-        if len(neighbors) > 0:
-            return list(random.choice(neighbors))
-        # if there are no neighbors, return a random configuration
-        return list(searchspace.get_random_sample(1)[0])
+        pos = tuple(pos)
+
+        # Note: the following tries to mimick as much as possible the earlier version of SA but in a constraint-aware version
+        for i in range(size):
+            if random.random() < 0.2:
+                pos = random_neighbor(pos, 'Hamming')
+        pos = random_neighbor(pos, 'adjacent')
+
+        return list(pos)
 
     else:
         tune_params = searchspace.tune_params
-        size = len(pos)
         pos_out = []
         # random mutation
         # expected value is set that values all dimensions attempt to get mutated

From 4394d135254a5039b8b1c581eea90cf731b7ca1b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 25 Jun 2025 16:16:25 +0200
Subject: [PATCH 193/253] Amended strategy tests to account for pyATF
 limitations

---
 test/strategies/test_strategies.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 67653190f..0c120d22d 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pytest
+from pathlib import Path
 
 import kernel_tuner
 from kernel_tuner.util import InvalidConfig
@@ -9,7 +10,6 @@
 
 from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
 
-cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
 
 @pytest.fixture
 def vector_add():
@@ -51,7 +51,7 @@ def vector_add():
         strategies.append(s)
 @pytest.mark.parametrize('strategy', strategies)
 def test_strategies(vector_add, strategy):
-
+    cache_filename =  Path(__file__).parent / "test_cache_file.json"
     options = dict(popsize=5, neighbor='adjacent')
 
     print(f"testing {strategy}")
@@ -64,6 +64,17 @@ def test_strategies(vector_add, strategy):
 
     restrictions = ["test_string == 'alg_2'", "test_bool == True", "test_mixed == 2.45"]
 
+    # pyATF can't handle non-number tune parameters, so we filter them out
+    if strategy == "pyatf_strategies":
+        tune_params = {
+            "block_size_x": [128 + 64 * i for i in range(15)]
+        }
+        restrictions = []
+        cache_filename = cache_filename.parent.parent / "test_cache_file.json"
+        vector_add[-1] = tune_params
+
+    # run the tuning in simulation mode
+    assert cache_filename.exists()
     results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                          verbose=False, cache=cache_filename, simulation_mode=True)
 
@@ -82,10 +93,6 @@ def test_strategies(vector_add, strategy):
     # check whether the returned dictionaries contain exactly the expected keys and the appropriate type
     expected_items = {
         'block_size_x': int,
-        'test_string': str,
-        'test_single': int,
-        'test_bool': bool,
-        'test_mixed': float,
         'time': (float, int),
         'times': list,
         'compile_time': (float, int),
@@ -95,6 +102,11 @@ def test_strategies(vector_add, strategy):
         'framework_time': (float, int),
         'timestamp': str
     }
+    if strategy != "pyatf_strategies":
+        expected_items['test_string'] = str
+        expected_items['test_single'] = int
+        expected_items['test_bool'] = bool
+        expected_items['test_mixed'] = float
     for res in results:
         assert len(res) == len(expected_items)
         for expected_key, expected_type in expected_items.items():

From e3980e2304b410c63f095d495ed1bf9da1f64796 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 25 Jun 2025 16:20:52 +0200
Subject: [PATCH 194/253] Implemented context for pyATF tests to be skipped if
 not installed

---
 test/context.py                    | 7 +++++++
 test/strategies/test_strategies.py | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/test/context.py b/test/context.py
index da4c7aa2f..bad152986 100644
--- a/test/context.py
+++ b/test/context.py
@@ -73,6 +73,12 @@
 except ImportError:
     bayes_opt_gpytorch_present = False
 
+try:
+    import pyatf
+    pyatf_present = True
+except ImportError:
+    pyatf_present = False
+
 try:
     from autotuning_methodology.report_experiments import get_strategy_scores
     methodology_present = True
@@ -101,6 +107,7 @@
 skip_if_no_bayesopt_gpytorch = pytest.mark.skipif(not bayes_opt_gpytorch_present, reason="Torch and GPyTorch not installed")
 skip_if_no_bayesopt_botorch = pytest.mark.skipif(not bayes_opt_botorch_present, reason="Torch and BOTorch not installed")
 skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found")
+skip_if_no_pyatf = pytest.mark.skipif(not pyatf_present, reason="PyATF not installed")
 skip_if_no_methodology = pytest.mark.skipif(not methodology_present, reason="Autotuning Methodology not found")
 
 
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 0c120d22d..faae370dc 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -8,7 +8,7 @@
 from kernel_tuner.util import InvalidConfig
 from kernel_tuner.interface import strategy_map
 
-from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
+from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch, skip_if_no_pyatf
 
 
 @pytest.fixture
@@ -47,6 +47,8 @@ def vector_add():
         strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
     elif 'botorch' in s.lower():
         strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_botorch))
+    elif 'pyatf' in s.lower():
+        strategies.append(pytest.param(s, marks=skip_if_no_pyatf))
     else:
         strategies.append(s)
 @pytest.mark.parametrize('strategy', strategies)

From ecf7218fbf81b4b1598513f11c53d52d9c4a5d46 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 25 Jun 2025 16:41:26 +0200
Subject: [PATCH 195/253] Lowered the cutoff percentile to have a faster test

---
 test/test_hyper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_hyper.py b/test/test_hyper.py
index 7863c2e47..5963b3260 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -24,7 +24,7 @@ def test_hyper(env):
                 "minimum_fraction_of_budget_valid": 0.01, 
             },
             "statistics_settings": {
-                "cutoff_percentile": 0.90,
+                "cutoff_percentile": 0.80,
                 "cutoff_percentile_start": 0.01,
                 "cutoff_type": "time",
                 "objective_time_keys": [

From 5c94c78ffbd1345ac520c682ff7ab37ab1babc3e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 25 Jun 2025 16:42:40 +0200
Subject: [PATCH 196/253] Basic implementation to use Searchspace in Bayesian
 Optimization

---
 kernel_tuner/strategies/bayes_opt.py          | 23 +++++--------------
 test/strategies/test_bayesian_optimization.py |  2 +-
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 66e360009..663cb12c8 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -24,8 +24,6 @@
 except ImportError:
     bayes_opt_present = False
 
-from kernel_tuner import util
-
 supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"]
 
 
@@ -107,19 +105,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     _, _, eps = cost_func.get_bounds_x0_eps()
 
     # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if searchspace.restrictions is not None:
-        tuning_options.verbose = False
-    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
-    parameter_space = list(parameter_space)
-    if len(parameter_space) < 1:
-        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
-    if len(parameter_space) == 1:
-        raise ValueError(
-            f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}"
-        )
+    # TODO actually use the Searchspace object properly throughout Bayesian Optimization
+    parameter_space = searchspace.list
 
     # normalize search space to [0,1]
     normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
@@ -137,7 +124,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     # initialize and optimize
     try:
         bo = BayesianOptimization(
-            parameter_space, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func
+            parameter_space, searchspace, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func
         )
     except StopCriterionReached:
         warnings.warn(
@@ -179,6 +166,7 @@ class BayesianOptimization:
     def __init__(
         self,
         searchspace: list,
+        searchspace_obj: Searchspace,
         removed_tune_params: list,
         tuning_options: dict,
         normalize_dict: dict,
@@ -256,6 +244,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
 
         # set remaining values
         self.__searchspace = searchspace
+        self.__searchspace_obj = searchspace_obj
         self.removed_tune_params = removed_tune_params
         self.searchspace_size = len(self.searchspace)
         self.num_dimensions = len(self.dimensions())
@@ -463,7 +452,7 @@ def evaluate_objective_function(self, param_config: tuple) -> float:
         """Evaluates the objective function."""
         param_config = self.unprune_param_config(param_config)
         denormalized_param_config = self.denormalize_param_config(param_config)
-        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
+        if not self.__searchspace_obj.is_param_config_valid(denormalized_param_config):
             return self.invalid_value
         val = self.cost_func(param_config)
         self.fevals += 1
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
index 8d929054a..f8c889aab 100644
--- a/test/strategies/test_bayesian_optimization.py
+++ b/test/strategies/test_bayesian_optimization.py
@@ -37,7 +37,7 @@
 pruned_parameter_space, removed_tune_params = bayes_opt.prune_parameter_space(normalized_parameter_space, tuning_options, tune_params, original_to_normalized)
 
 # initialize BO
-BO = BayesianOptimization(pruned_parameter_space, removed_tune_params, tuning_options, original_to_normalized, normalized_to_original, cost_func)
+BO = BayesianOptimization(pruned_parameter_space, searchspace, removed_tune_params, tuning_options, original_to_normalized, normalized_to_original, cost_func)
 predictions, _, std = BO.predict_list(BO.unvisited_cache)
 
 

From 6cd8029bf26f049b0ed6172c74e61349cdd21375 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 25 Jun 2025 17:43:03 +0200
Subject: [PATCH 197/253] Changed pyATF strategies to use invalid configs

---
 kernel_tuner/strategies/pyatf_strategies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py
index 92bbc718a..897b3b5b6 100644
--- a/kernel_tuner/strategies/pyatf_strategies.py
+++ b/kernel_tuner/strategies/pyatf_strategies.py
@@ -42,7 +42,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         warn("dill is not installed, pyATF search space caching will not be used.")
 
     # setup the Kernel Tuner functionalities
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, snap=False, return_invalid=False)
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, snap=False, return_invalid=True)
 
     # dynamically import the search technique based on the provided options
     module = import_module(f"pyatf.search_techniques.{module_name}")

From ec51b0a0ac3eac1b82de62d40e94202ee9ea4af2 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 25 Jun 2025 17:53:52 +0200
Subject: [PATCH 198/253] Changed to cached neighbor lookup now multiple
 methods are cached simultaneously

---
 kernel_tuner/strategies/common.py              | 2 +-
 kernel_tuner/strategies/genetic_algorithm.py   | 4 ++--
 kernel_tuner/strategies/greedy_ils.py          | 2 +-
 kernel_tuner/strategies/simulated_annealing.py | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index efe323375..c0d0b077a 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -375,7 +375,7 @@ def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps):
 
 def get_neighbors(params, searchspace):
     for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
-        neighbors = searchspace.get_neighbors_no_cache(tuple(params), neighbor_method=neighbor_method)
+        neighbors = searchspace.get_neighbors(tuple(params), neighbor_method=neighbor_method)
         if len(neighbors) > 0:
             return neighbors
     return []
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 27f07e8db..b0536edc5 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -161,7 +161,7 @@ def mutate(self, dna, cache=False):
                 if cache:
                     neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
                 else:
-                    neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
+                    neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
                 if len(neighbors) > 0:
                     return list(random.choice(neighbors))
             else:
@@ -185,7 +185,7 @@ def repair(self, dna):
             # search for valid configurations neighboring this config
             # start from strictly-adjacent to increasingly allowing more neighbors
             for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
-                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method=neighbor_method)
+                neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method=neighbor_method)
 
                 # if we have found valid neighboring configurations, select one at random
                 if len(neighbors) > 0:
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
index 51a3c784e..387ea9ece 100644
--- a/kernel_tuner/strategies/greedy_ils.py
+++ b/kernel_tuner/strategies/greedy_ils.py
@@ -60,7 +60,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 tune.__doc__ = common.get_strategy_docstring("Greedy Iterative Local Search (ILS)", _options)
 
 def mutate(indiv, searchspace: Searchspace):
-    neighbors = searchspace.get_neighbors_no_cache(tuple(indiv), neighbor_method="Hamming")
+    neighbors = searchspace.get_neighbors(tuple(indiv), neighbor_method="Hamming")
     return list(random_choice(neighbors))
 
 
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 4ab6690cc..f383abb1c 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -117,7 +117,7 @@ def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
 
     def random_neighbor(pos, method):
         """Helper method to return a random neighbor."""
-        neighbors = searchspace.get_neighbors_no_cache(pos, neighbor_method=method)
+        neighbors = searchspace.get_neighbor(pos, neighbor_method=method)
         if not neighbors:
             return pos
         return random.choice(neighbors)

From f2902b5979081fb5121ba39e89a7b29081693fcf Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 26 Jun 2025 09:45:14 +0200
Subject: [PATCH 199/253] Implemented a fix for the non-constrained version of
 GA

---
 kernel_tuner/strategies/genetic_algorithm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index b0536edc5..d885f1f9f 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -36,7 +36,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     population = GA.generate_population()
 
     for generation in range(generations):
-        if any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]):
+        if constraint_aware and any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]):
             raise ValueError(f"Generation {generation}/{generations}, population validity: {[searchspace.is_param_config_valid(tuple(dna)) for dna in population]}")
 
         # determine fitness of population members
@@ -77,7 +77,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             for child in children:
                 child = GA.mutate(child)
 
-                if child not in population and searchspace.is_param_config_valid(tuple(child)):
+                if child not in population and (not constraint_aware or searchspace.is_param_config_valid(tuple(child))):
                     population.append(child)
 
                 if len(population) >= pop_size:
@@ -161,7 +161,7 @@ def mutate(self, dna, cache=False):
                 if cache:
                     neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
                 else:
-                    neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+                    neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
                 if len(neighbors) > 0:
                     return list(random.choice(neighbors))
             else:

From 27226a9b1bf8c207bb55ad203bf743fce98aee72 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 26 Jun 2025 10:22:11 +0200
Subject: [PATCH 200/253] Improvements to Simulated Annealing regarding invalid
 and error configurations

---
 kernel_tuner/strategies/simulated_annealing.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index f383abb1c..6034163aa 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from kernel_tuner.util import StopCriterionReached
+from kernel_tuner.util import StopCriterionReached, ErrorConfig
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -18,7 +18,7 @@
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     # SA works with real parameter values and does not need scaling
-    cost_func = CostFunc(searchspace, tuning_options, runner)
+    cost_func = CostFunc(searchspace, tuning_options, runner, return_invalid=True)
 
     # optimization parameters
     T, T_min, alpha, niter, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
@@ -36,7 +36,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     # get random starting point and evaluate cost
     pos = generate_starting_point(searchspace, constraint_aware)
-    old_cost = cost_func(pos, check_restrictions=False)
+    old_cost = cost_func(pos, check_restrictions=not constraint_aware)
 
     # main optimization loop
     stuck = 0
@@ -92,13 +92,12 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 def acceptance_prob(old_cost, new_cost, T, tuning_options):
     """Annealing equation, with modifications to work towards a lower value."""
-    error_val = sys.float_info.max
     res = 0.0
     # if start pos is not valid, always move
-    if old_cost == error_val:
+    if isinstance(old_cost, ErrorConfig):
         res = 1.0
     # if we have found a valid ps before, never move to nonvalid pos
-    elif new_cost == error_val:
+    elif isinstance(new_cost, ErrorConfig):
         res = 0.0
     # always move if new cost is better
     elif new_cost < old_cost:
@@ -117,7 +116,7 @@ def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
 
     def random_neighbor(pos, method):
         """Helper method to return a random neighbor."""
-        neighbors = searchspace.get_neighbor(pos, neighbor_method=method)
+        neighbors = searchspace.get_neighbors(pos, neighbor_method=method)
         if not neighbors:
             return pos
         return random.choice(neighbors)

From 8954a46a1a7c5fcca28340acf0fffde8c67d65d1 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 27 Jun 2025 23:51:14 +0200
Subject: [PATCH 201/253] Implemented building multiple neighbor index caches,
 optional parameter to build full cache

---
 kernel_tuner/searchspace.py | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index fc0e57e6b..6639a82f7 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -79,6 +79,7 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
+        self.original_tune_params = tune_params.copy() if hasattr(tune_params, "copy") else tune_params
         self.max_threads = max_threads
         self.block_size_names = block_size_names
         self._tensorspace = None
@@ -92,6 +93,7 @@ def __init__(
         self._map_tensor_to_param = {}
         self._map_param_to_tensor = {}
         self.restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
+        self.original_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
         self.param_names = list(self.tune_params.keys())
@@ -100,6 +102,7 @@ def __init__(
         self.build_neighbors_index = build_neighbors_index
         self.solver_method = solver_method
         self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods }
+        self.neighbors_index = dict()
         self.neighbor_method = neighbor_method
         if (neighbor_method is not None or build_neighbors_index) and neighbor_method not in supported_neighbor_methods:
             raise ValueError(f"Neighbor method is {neighbor_method}, must be one of {supported_neighbor_methods}")
@@ -175,7 +178,7 @@ def __init__(
             if neighbor_method is not None and neighbor_method != "Hamming":
                 self.__prepare_neighbors_index()
             if build_neighbors_index:
-                self.neighbors_index = self.__build_neighbors_index(neighbor_method)
+                self.neighbors_index[neighbor_method] = self.__build_neighbors_index(neighbor_method)
 
     # def __build_searchspace_ortools(self, block_size_names: list, max_threads: int) -> Tuple[List[tuple], np.ndarray, dict, int]:
     #     # Based on https://developers.google.com/optimization/cp/cp_solver#python_2
@@ -452,7 +455,8 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver:
         # add the user-specified restrictions as constraints on the parameter space
         if not isinstance(self.restrictions, (list, tuple)):
             self.restrictions = [self.restrictions]
-        self.restrictions = convert_constraint_lambdas(self.restrictions)
+        if any(not isinstance(restriction, (Constraint, FunctionConstraint, str)) for restriction in self.restrictions):
+            self.restrictions = convert_constraint_lambdas(self.restrictions)
         parameter_space = self.__add_restrictions(parameter_space)
 
         # add the default blocksize threads restrictions last, because it is unlikely to reduce the parameter space by much
@@ -901,24 +905,25 @@ def get_random_sample(self, num_samples: int) -> List[tuple]:
             num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
-    def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None) -> List[int]:
+    def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""
         param_config_index = self.get_param_config_index(param_config)
 
-        # this is the simplest case, just return the cached value
-        if self.build_neighbors_index and param_config_index is not None:
-            if neighbor_method is not None and neighbor_method != self.neighbor_method:
-                raise ValueError(
-                    f"The neighbor method {neighbor_method} differs from the neighbor method {self.neighbor_method} initially used for indexing"
-                )
-            return self.neighbors_index[param_config_index]
-
         # check if there is a neighbor method to use
         if neighbor_method is None:
             if self.neighbor_method is None:
                 raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set")
             neighbor_method = self.neighbor_method
 
+        # this is the simplest case, just return the cached value
+        if param_config_index is not None:
+            if neighbor_method in self.neighbors_index:
+                return self.neighbors_index[neighbor_method][param_config_index]
+            elif build_full_cache:
+                # build the neighbors index for the given neighbor method
+                self.neighbors_index[neighbor_method] = self.__build_neighbors_index(neighbor_method)
+                return self.neighbors_index[neighbor_method][param_config_index]
+
         if neighbor_method == "Hamming":
             return self.__get_neighbors_indices_hamming(param_config)
 
@@ -933,7 +938,7 @@ def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=No
             return self.__get_neighbors_indices_adjacent(param_config_index, param_config)
         raise ValueError(f"The neighbor method {neighbor_method} is not in {supported_neighbor_methods}")
 
-    def get_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> List[int]:
+    def get_neighbors_indices(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration, cached if requested before."""
         if neighbor_method is None:
             neighbor_method = self.neighbor_method
@@ -942,7 +947,7 @@ def get_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> Li
         neighbors = self.__neighbor_cache[neighbor_method].get(param_config, None)
         # if there are no cached neighbors, compute them
         if neighbors is None:
-            neighbors = self.get_neighbors_indices_no_cache(param_config, neighbor_method)
+            neighbors = self.get_neighbors_indices_no_cache(param_config, neighbor_method, build_full_cache)
             self.__neighbor_cache[neighbor_method][param_config] = neighbors
         return neighbors
 
@@ -958,9 +963,9 @@ def get_neighbors_no_cache(self, param_config: tuple, neighbor_method=None) -> L
         """Get the neighbors for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""
         return self.get_param_configs_at_indices(self.get_neighbors_indices_no_cache(param_config, neighbor_method))
 
-    def get_neighbors(self, param_config: tuple, neighbor_method=None) -> List[tuple]:
+    def get_neighbors(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[tuple]:
         """Get the neighbors for a parameter configuration."""
-        return self.get_param_configs_at_indices(self.get_neighbors_indices(param_config, neighbor_method))
+        return self.get_param_configs_at_indices(self.get_neighbors_indices(param_config, neighbor_method, build_full_cache))
 
     def get_param_neighbors(self, param_config: tuple, index: int, neighbor_method: str, randomize: bool) -> list:
         """Get the neighboring parameters at an index."""

From e2924e0d8620de7f0b75ddd761211130a34ae788 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 3 Jul 2025 09:02:20 +0200
Subject: [PATCH 202/253] Improvements for Numpy 2.0 compatibility

---
 kernel_tuner/strategies/bayes_opt.py | 2 +-
 pyproject.toml                       | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 663cb12c8..589216d1b 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -229,7 +229,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
             self.worst_value = np.inf
             self.argopt = np.argmin
         elif opt_direction == "max":
-            self.worst_value = np.NINF
+            self.worst_value = -np.inf
             self.argopt = np.argmax
         else:
             raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
diff --git a/pyproject.toml b/pyproject.toml
index 2f259bdf6..0565f571e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -125,7 +125,7 @@ pytest-cov = "^5.0.0"
 mock = "^5.1.0"
 nox = "^2024.4.15"
 nox-poetry = "^1.0.3"
-ruff = "^0.4.4"
+ruff = "^0.4.8"
 pep440 = "^0.1.2"
 tomli = "^2.0.1"          # held back by Python <= 3.10, can be replaced by built-in [tomllib](https://docs.python.org/3.11/library/tomllib.html) from Python 3.11 onwards
 
@@ -154,3 +154,5 @@ select = [
 ]
 [tool.ruff.pydocstyle]
 convention = "google"
+[tool.ruff.lint]
+select = ["NPY201"]

From c39b87eba2b85e4f3f951162017595b17328ccf4 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 01:52:02 +0200
Subject: [PATCH 203/253] replace differential evolution strategy

---
 kernel_tuner/strategies/diff_evo.py | 321 ++++++++++++++++++++++++++--
 test/strategies/__init__.py         |   0
 test/strategies/test_diff_evo.py    | 152 +++++++++++++
 test/test_runners.py                |  10 -
 4 files changed, 453 insertions(+), 30 deletions(-)
 create mode 100644 test/strategies/__init__.py
 create mode 100644 test/strategies/test_diff_evo.py

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 5ad2b9474..b268c55cd 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -1,43 +1,324 @@
-"""The differential evolution strategy that optimizes the search through the parameter space."""
-from scipy.optimize import differential_evolution
+"""A simple Different Evolution for parameter search."""
+import re
+import numpy as np
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
 
-supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
+_options = dict(
+    popsize=("population size", 50),
+    maxiter=("maximum number of generations", 200),
+    F=("mutation factor (differential weight)", 0.8),
+    CR=("crossover rate", 0.9),
+    method=("method", "best1bin")
+)
 
-_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"),
-                       popsize=("Population size", 20),
-                       maxiter=("Number of generations", 100))
+supported_methods = ["best1bin", "rand1bin", "best2bin", "rand2bin", "best1exp", "rand1exp", "best2exp", "rand2exp", "currenttobest1bin", "currenttobest1exp", "randtobest1bin", "randtobest1exp"]
 
 
 def tune(searchspace: Searchspace, runner, tuning_options):
-
-
-    method, popsize, maxiter = common.get_options(tuning_options.strategy_options, _options)
-
-    # build a bounds array as needed for the optimizer
     cost_func = CostFunc(searchspace, tuning_options, runner)
     bounds = cost_func.get_bounds()
 
-    # ensure particles start from legal points
-    population = list(list(p) for p in searchspace.get_random_sample(popsize))
+    options = tuning_options.strategy_options
+    popsize, maxiter, F, CR, method = common.get_options(options, _options)
+
+    if method not in supported_methods:
+        raise ValueError(f"Error {method} not supported, {supported_methods=}")
 
-    # call the differential evolution optimizer
-    opt_result = None
     try:
-        opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population,
-                                        polish=False, strategy=method, disp=tuning_options.verbose)
+        differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, tuning_options.verbose)
     except util.StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
-    if opt_result and tuning_options.verbose:
-        print(opt_result.message)
-
     return cost_func.results
 
 
 tune.__doc__ = common.get_strategy_docstring("Differential Evolution", _options)
+
+
+def values_to_indices(individual_values, tune_params):
+    """Converts an individual's values to its corresponding index vector."""
+    idx = np.zeros(len(individual_values))
+    for i, v in enumerate(tune_params.values()):
+        idx[i] = v.index(individual_values[i])
+    return idx
+
+
+def indices_to_values(individual_indices, tune_params):
+    """Converts an individual's index vector back to its values."""
+    tune_params_list = list(tune_params.values())
+    print(f"{tune_params_list=} {individual_indices=}")
+    values = []
+    for dim, idx in enumerate(individual_indices):
+        values.append(tune_params_list[dim][idx])
+    return np.array(values)
+
+
+def parse_method(method):
+    """ Helper func to parse the preferred method into its components. """
+    pattern = r"^(best|rand|currenttobest|randtobest)(1|2)(bin|exp)$"
+    match = re.fullmatch(pattern, method)
+
+    if match:
+        return match.group(1) == "best", int(match.group(2)), mutation[match.group(2)], crossover[match.group(3)]
+    else:
+        raise ValueError("Error parsing differential evolution method")
+
+
+def random_draw(idxs, mutation, best):
+    """
+    Draw requested number of random individuals.
+
+    Draw without replacement unless there is not enough to draw from.
+    """
+    draw = 2 * mutation + 1 - int(best)
+    return np.random.choice(idxs, draw, replace=draw>=len(idxs))
+
+
+def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, verbose):
+    """
+    A basic implementation of the Differential Evolution algorithm.
+
+    This function finds the minimum of a given cost function within specified bounds.
+
+    Args:
+        cost_func (callable): The objective function to be minimized. It should take a
+                              single argument (a numpy array of parameters) and return a
+                              single scalar value (the cost).
+        bounds (list of tuples): A list where each tuple contains the (min, max) bounds
+                                 for each parameter. e.g., [(-5, 5), (-5, 5)]
+        popsize (int): The size of the population.
+        maxiter (int): The maximum number of generations to run.
+        F (float): The mutation factor, also known as the differential weight.
+                   Should be in the range [0, 2].
+        CR (float): The crossover probability. Should be in the range [0, 1].
+        verbose (bool): If True, prints the progress of the algorithm at each generation.
+
+    Returns:
+        dict: A dictionary containing the best solution found ('solution') and its
+              corresponding cost ('cost').
+    """
+    tune_params = cost_func.tuning_options.tune_params
+    min_idx = np.zeros(len(tune_params))
+    max_idx = [len(v)-1 for v in tune_params.values()]
+
+    best, mutation, mutation_method, crossover_method = parse_method(method)
+
+    # --- 1. Initialization ---
+
+    # Get the number of dimensions from the bounds list
+    dimensions = len(bounds)
+
+    # Convert bounds to a numpy array for easier manipulation
+    bounds = np.array(bounds)
+
+    # Initialize the population with random individuals within the bounds
+    population = np.array(list(list(p) for p in searchspace.get_random_sample(popsize)))
+
+    # Calculate the initial cost for each individual in the population
+    population_cost = np.array([cost_func(ind) for ind in population])
+
+    # Keep track of the best solution found so far
+    best_idx = np.argmin(population_cost)
+    best_solution = population[best_idx]
+    best_solution_idx = values_to_indices(best_solution, tune_params)
+    best_cost = population_cost[best_idx]
+
+    # --- 2. Main Loop ---
+
+    # Iterate through the specified number of generations
+    for generation in range(maxiter):
+
+        trial_population = []
+
+        # Iterate over each individual in the population
+        for i in range(popsize):
+
+            # --- a. Mutation ---
+
+            # Select three distinct random individuals (a, b, c) from the population,
+            # ensuring they are different from the current individual 'i'.
+            idxs = [idx for idx in range(popsize) if idx != i]
+            randos = random_draw(idxs, mutation, best)
+
+            if mutation_method == mutate_currenttobest1:
+                randos[0] = i
+
+            randos_idx = [values_to_indices(population[rando], tune_params) for rando in randos]
+
+            # Apply mutation strategy
+            donor_vector_idx = mutation_method(best_solution_idx, randos_idx, F, min_idx, max_idx, best)
+            donor_vector = indices_to_values(donor_vector_idx, tune_params)
+
+            # --- b. Crossover ---
+            trial_vector = crossover_method(donor_vector, population[i], CR)
+
+            # Store for selection
+            trial_population.append(trial_vector)
+
+        # --- c. Selection ---
+
+        # Calculate the cost of the new trial vectors
+        trial_population_cost = np.array([cost_func(ind) for ind in trial_population])
+
+        # Iterate over each individual in the trial population
+        for i in range(popsize):
+
+            trial_vector = trial_population[i]
+            trial_cost = trial_population_cost[i]
+
+            # If the trial vector has a lower or equal cost, it replaces the
+            # target vector in the population for the next generation.
+            if trial_cost <= population_cost[i]:
+                population[i] = trial_vector
+                population_cost[i] = trial_cost
+
+                # Update the overall best solution if the new one is better
+                if trial_cost < best_cost:
+                    best_cost = trial_cost
+                    best_solution = trial_vector
+                    best_solution_idx = values_to_indices(best_solution, tune_params)
+
+        # Print the progress at the end of the generation
+        if verbose:
+            print(f"Generation {generation + 1}, Best Cost: {best_cost:.6f}")
+
+    return {'solution': best_solution, 'cost': best_cost}
+
+
+def round_and_clip(mutant_idx_float, min_idx, max_idx):
+    """ Helper func to round floating index to nearest integer and clip within bounds. """
+    # Round to the nearest integer
+    rounded_idx = np.round(mutant_idx_float)
+
+    # Clip the indices to ensure they are within valid index bounds
+    clipped_idx = np.clip(rounded_idx, min_idx, max_idx)
+
+    # Convert final mutant vector to integer type
+    return clipped_idx.astype(int)
+
+
+def mutate_currenttobest1(best_idx, randos_idx, F, min_idx, max_idx, best):
+    """
+    Performs the DE/1 currenttobest1 mutation strategy.
+
+    This function operates on the indices of the parameters, not their actual values.
+    The formula v = cur + F * (best - cur + a - b) is applied to the indices, and the result is
+    then rounded and clipped to ensure it remains a valid index.
+    """
+    cur_idx, b_idx, c_idx = randos_idx
+
+    # Apply the DE/currenttobest/1 formula to the indices
+    mutant_idx_float = cur_idx + F * (best_idx - cur_idx + b_idx - c_idx)
+
+    return round_and_clip(mutant_idx_float, min_idx, max_idx)
+
+
+def mutate_randtobest1(best_idx, randos_idx, F, min_idx, max_idx, best):
+    """
+    Performs the DE/1 randtobest1 mutation strategy.
+
+    This function operates on the indices of the parameters, not their actual values.
+    The formula v = a + F * (best - a + b - c) is applied to the indices, and the result is
+    then rounded and clipped to ensure it remains a valid index.
+    """
+    a_idx, b_idx, c_idx = randos_idx
+
+    # Apply the DE/currenttobest/1 formula to the indices
+    mutant_idx_float = a_idx + F * (best_idx - a_idx + b_idx - c_idx)
+
+    return round_and_clip(mutant_idx_float, min_idx, max_idx)
+
+
+def mutate_de_1(best_idx, randos_idx, F, min_idx, max_idx, best):
+    """
+    Performs the DE/1 mutation strategy.
+
+    This function operates on the indices of the parameters, not their actual values.
+    The formula v = a + F * (b - c) is applied to the indices, and the result is
+    then rounded and clipped to ensure it remains a valid index.
+
+    """
+    if best:
+        a_idx = best_idx
+        b_idx, c_idx = randos_idx
+    else:
+        a_idx, b_idx, c_idx = randos_idx
+
+    # Apply the DE/rand/1 formula to the indices
+    mutant_idx_float = a_idx + F * (b_idx - c_idx)
+
+    return round_and_clip(mutant_idx_float, min_idx, max_idx)
+
+
+def mutate_de_2(best_idx, randos_idx, F, min_idx, max_idx, best):
+    """
+    Performs the DE/2 mutation strategy for a discrete search space.
+
+    This function operates on the indices of the parameters, not their actual values.
+    The formula v = a + F1 * (b - c) + F2 * (d - e) is applied to the indices,
+    and the result is then rounded and clipped to ensure it remains a valid index.
+
+    """
+    if best:
+        a_idx = best_idx
+        b_idx, c_idx, d_idx, e_idx = randos_idx
+    else:
+        a_idx, b_idx, c_idx, d_idx, e_idx = randos_idx
+
+    # Apply the DE/2 formula to the indices
+    mutant_idx_float = a_idx + F * (b_idx + c_idx - d_idx - e_idx)
+
+    return round_and_clip(mutant_idx_float, min_idx, max_idx)
+
+
+def binomial_crossover(donor_vector, target, CR):
+    """ Performs binomial crossover of donor_vector with target given crossover rate CR. """
+    # Create the trial vector by mixing parameters from the target and donor vectors
+    trial_vector = np.copy(target)
+    dimensions = len(donor_vector)
+
+    # Generate a random array of floats for comparison with the crossover rate CR
+    crossover_points = np.random.rand(dimensions) < CR
+
+    # Ensure at least one parameter is taken from the donor vector
+    # to prevent the trial vector from being identical to the target vector.
+    if not np.any(crossover_points):
+        crossover_points[np.random.randint(0, dimensions)] = True
+
+    # Apply crossover
+    trial_vector[crossover_points] = donor_vector[crossover_points]
+
+    return trial_vector
+
+
+def exponential_crossover(donor_vector, target, CR):
+    """
+    Performs exponential crossover for a discrete search space.
+
+    This creates a trial vector by taking a contiguous block of parameters
+    from the donor vector and the rest from the target vector.
+    """
+    dimensions = len(target)
+    trial_idx = np.copy(target)
+
+    # 1. Select a random starting point for the crossover block.
+    start_point = np.random.randint(0, dimensions)
+
+    # 2. Determine the length of the block to be copied from the mutant.
+    # The loop continues as long as random numbers are less than CR.
+    # This ensures at least one parameter is always taken from the mutant.
+    l = 0
+    while np.random.rand() < CR and l < dimensions:
+        crossover_point = (start_point + l) % dimensions
+        trial_idx[crossover_point] = donor_vector[crossover_point]
+        l += 1
+
+    return trial_idx
+
+mutation = {"1": mutate_de_1, "2": mutate_de_2, "currenttobest1": mutate_currenttobest1, "randtobest1": mutate_randtobest1}
+crossover = {"bin": binomial_crossover, "exp": exponential_crossover}
diff --git a/test/strategies/__init__.py b/test/strategies/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/strategies/test_diff_evo.py b/test/strategies/test_diff_evo.py
new file mode 100644
index 000000000..5b8697e5f
--- /dev/null
+++ b/test/strategies/test_diff_evo.py
@@ -0,0 +1,152 @@
+import numpy as np
+import pytest
+from kernel_tuner.strategies.diff_evo import values_to_indices, indices_to_values, mutate_de_1, mutate_de_2, binomial_crossover, exponential_crossover
+from kernel_tuner.strategies.diff_evo import supported_methods
+from kernel_tuner import tune_kernel
+
+from .test_strategies import vector_add, cache_filename
+
+def test_values_to_indices():
+
+    tune_params = {}
+    tune_params["block_size_x"] = [16, 32, 128, 1024]
+
+    result = values_to_indices([1024], tune_params)
+    expected = [3]
+    assert result[0] == expected[0]
+    assert len(result) == len(expected)
+
+    tune_params["block_size_y"] = [16, 32, 128, 1024]
+
+    result = values_to_indices([32, 128], tune_params)
+    expected = [1, 2]
+    assert result[0] == expected[0]
+    assert result[1] == expected[1]
+    assert len(result) == len(expected)
+
+
+def test_indices_to_values():
+
+    tune_params = {}
+    tune_params["block_size_x"] = [16, 32, 128, 1024]
+
+    expected = [1024]
+    result = indices_to_values([3], tune_params)
+    assert result[0] == expected[0]
+    assert len(result) == len(expected)
+
+    tune_params["block_size_y"] = [16, 32, 128, 1024]
+    expected = [1024, 32]
+    result = indices_to_values([3,1], tune_params)
+    assert result[0] == expected[0]
+    assert result[1] == expected[1]
+    assert len(result) == len(expected)
+
+
+def test_mutate_de_1():
+
+    tune_params = {}
+    tune_params["block_size_x"] = [16, 32, 128, 256, 512, 1024]
+    tune_params["block_size_y"] = [1, 2, 8]
+    tune_params["block_size_z"] = [1, 2, 4, 8]
+
+    a_idx = np.array([0, 1, 2])
+    b_idx = np.array([4, 1, 0])
+    c_idx = np.array([5, 0, 1])
+    randos_idx = [a_idx, b_idx, c_idx]
+
+    F = 0.8
+    params_list = list(tune_params)
+    min_idx = np.zeros(len(tune_params))
+    max_idx = [len(v)-1 for v in tune_params.values()]
+
+    mutant = mutate_de_1(a_idx, randos_idx, F, min_idx, max_idx, False)
+
+    assert len(mutant) == len(a_idx)
+
+    for dim, idx in enumerate(mutant):
+        assert isinstance(idx, np.integer)
+        assert min_idx[dim] <= idx <= max_idx[dim]
+
+    mutant = mutate_de_1(a_idx, randos_idx[:-1], F, min_idx, max_idx, True)
+
+    assert len(mutant) == len(a_idx)
+
+    for dim, idx in enumerate(mutant):
+        assert isinstance(idx, np.integer)
+        assert min_idx[dim] <= idx <= max_idx[dim]
+
+
+def test_mutate_de_2():
+
+    tune_params = {}
+    tune_params["block_size_x"] = [16, 32, 128, 256, 512, 1024]
+    tune_params["block_size_y"] = [1, 2, 8]
+    tune_params["block_size_z"] = [1, 2, 4, 8]
+
+    a_idx = np.array([0, 1, 2])
+    b_idx = np.array([4, 1, 0])
+    c_idx = np.array([5, 0, 1])
+    d_idx = np.array([3, 2, 3])
+    e_idx = np.array([1, 0, 3])
+    randos_idx = [a_idx, b_idx, c_idx, d_idx, e_idx]
+
+    F = 0.8
+    params_list = list(tune_params)
+    min_idx = np.zeros(len(tune_params))
+    max_idx = [len(v)-1 for v in tune_params.values()]
+
+    mutant = mutate_de_2(a_idx, randos_idx, F, min_idx, max_idx, False)
+
+    assert len(mutant) == len(a_idx)
+
+    for dim, idx in enumerate(mutant):
+        assert isinstance(idx, np.integer)
+        assert min_idx[dim] <= idx <= max_idx[dim]
+
+    mutant = mutate_de_2(a_idx, randos_idx[:-1], F, min_idx, max_idx, True)
+
+    assert len(mutant) == len(a_idx)
+
+    for dim, idx in enumerate(mutant):
+        assert isinstance(idx, np.integer)
+        assert min_idx[dim] <= idx <= max_idx[dim]
+
+
+def test_binomial_crossover():
+
+    donor_vector = np.array([1, 2, 3, 4, 5])
+    target = np.array([6, 7, 8, 9, 10])
+    CR = 0.8
+
+    result = binomial_crossover(donor_vector, target, CR)
+    assert len(result) == len(donor_vector)
+
+    for dim, val in enumerate(result):
+        assert (val == donor_vector[dim]) or (val == target[dim])
+
+
+def test_exponential_crossover():
+
+    donor_vector = np.array([1, 2, 3, 4, 5])
+    target = np.array([6, 7, 8, 9, 10])
+    CR = 0.8
+
+    result = exponential_crossover(donor_vector, target, CR)
+    assert len(result) == len(donor_vector)
+
+    for dim, val in enumerate(result):
+        assert (val == donor_vector[dim]) or (val == target[dim])
+
+
+@pytest.mark.parametrize('method', supported_methods)
+def test_diff_evo(vector_add, method):
+    result, _ = tune_kernel(*vector_add,
+                            strategy="diff_evo",
+                            strategy_options=dict(popsize=5, method=method),
+                            verbose=True,
+                            cache=cache_filename,
+                            simulation_mode=True)
+    assert len(result) > 0
+
+
diff --git a/test/test_runners.py b/test/test_runners.py
index acbb641e6..22c11f7cd 100644
--- a/test/test_runners.py
+++ b/test/test_runners.py
@@ -130,16 +130,6 @@ def test_simulation_runner(env):
     assert max_time - recorded_time_including_simulation < 10
 
 
-def test_diff_evo(env):
-    result, _ = tune_kernel(*env,
-                            strategy="diff_evo",
-                            strategy_options=dict(popsize=5),
-                            verbose=True,
-                            cache=cache_filename,
-                            simulation_mode=True)
-    assert len(result) > 0
-
-
 def test_restrictions(env):
     restrictions = [lambda p: p["block_size_x"] <= 512, "block_size_x > 128"]
 

From a81765a2d6c2ddd4c4ea4f8009945012105cbeeb Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 02:00:29 +0200
Subject: [PATCH 204/253] formatted with black

---
 kernel_tuner/strategies/diff_evo.py | 41 ++++++++++++++++++++---------
 test/strategies/test_diff_evo.py    | 34 +++++++++++++++---------
 2 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index b268c55cd..92ad21bb0 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -1,4 +1,5 @@
 """A simple Different Evolution for parameter search."""
+
 import re
 import numpy as np
 
@@ -12,10 +13,23 @@
     maxiter=("maximum number of generations", 200),
     F=("mutation factor (differential weight)", 0.8),
     CR=("crossover rate", 0.9),
-    method=("method", "best1bin")
+    method=("method", "best1bin"),
 )
 
-supported_methods = ["best1bin", "rand1bin", "best2bin", "rand2bin", "best1exp", "rand1exp", "best2exp", "rand2exp", "currenttobest1bin", "currenttobest1exp", "randtobest1bin", "randtobest1exp"]
+supported_methods = [
+    "best1bin",
+    "rand1bin",
+    "best2bin",
+    "rand2bin",
+    "best1exp",
+    "rand1exp",
+    "best2exp",
+    "rand2exp",
+    "currenttobest1bin",
+    "currenttobest1exp",
+    "randtobest1bin",
+    "randtobest1exp",
+]
 
 
 def tune(searchspace: Searchspace, runner, tuning_options):
@@ -59,7 +73,7 @@ def indices_to_values(individual_indices, tune_params):
 
 
 def parse_method(method):
-    """ Helper func to parse the preferred method into its components. """
+    """Helper func to parse the preferred method into its components."""
     pattern = r"^(best|rand|currenttobest|randtobest)(1|2)(bin|exp)$"
     match = re.fullmatch(pattern, method)
 
@@ -76,7 +90,7 @@ def random_draw(idxs, mutation, best):
     Draw without replacement unless there is not enough to draw from.
     """
     draw = 2 * mutation + 1 - int(best)
-    return np.random.choice(idxs, draw, replace=draw>=len(idxs))
+    return np.random.choice(idxs, draw, replace=draw >= len(idxs))
 
 
 def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, verbose):
@@ -104,15 +118,12 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
     """
     tune_params = cost_func.tuning_options.tune_params
     min_idx = np.zeros(len(tune_params))
-    max_idx = [len(v)-1 for v in tune_params.values()]
+    max_idx = [len(v) - 1 for v in tune_params.values()]
 
     best, mutation, mutation_method, crossover_method = parse_method(method)
 
     # --- 1. Initialization ---
 
-    # Get the number of dimensions from the bounds list
-    dimensions = len(bounds)
-
     # Convert bounds to a numpy array for easier manipulation
     bounds = np.array(bounds)
 
@@ -187,11 +198,11 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
         if verbose:
             print(f"Generation {generation + 1}, Best Cost: {best_cost:.6f}")
 
-    return {'solution': best_solution, 'cost': best_cost}
+    return {"solution": best_solution, "cost": best_cost}
 
 
 def round_and_clip(mutant_idx_float, min_idx, max_idx):
-    """ Helper func to round floating index to nearest integer and clip within bounds. """
+    """Helper func to round floating index to nearest integer and clip within bounds."""
     # Round to the nearest integer
     rounded_idx = np.round(mutant_idx_float)
 
@@ -277,7 +288,7 @@ def mutate_de_2(best_idx, randos_idx, F, min_idx, max_idx, best):
 
 
 def binomial_crossover(donor_vector, target, CR):
-    """ Performs binomial crossover of donor_vector with target given crossover rate CR. """
+    """Performs binomial crossover of donor_vector with target given crossover rate CR."""
     # Create the trial vector by mixing parameters from the target and donor vectors
     trial_vector = np.copy(target)
     dimensions = len(donor_vector)
@@ -320,5 +331,11 @@ def exponential_crossover(donor_vector, target, CR):
 
     return trial_idx
 
-mutation = {"1": mutate_de_1, "2": mutate_de_2, "currenttobest1": mutate_currenttobest1, "randtobest1": mutate_randtobest1}
+
+mutation = {
+    "1": mutate_de_1,
+    "2": mutate_de_2,
+    "currenttobest1": mutate_currenttobest1,
+    "randtobest1": mutate_randtobest1,
+}
 crossover = {"bin": binomial_crossover, "exp": exponential_crossover}
diff --git a/test/strategies/test_diff_evo.py b/test/strategies/test_diff_evo.py
index 5b8697e5f..f89fe8507 100644
--- a/test/strategies/test_diff_evo.py
+++ b/test/strategies/test_diff_evo.py
@@ -1,11 +1,19 @@
 import numpy as np
 import pytest
-from kernel_tuner.strategies.diff_evo import values_to_indices, indices_to_values, mutate_de_1, mutate_de_2, binomial_crossover, exponential_crossover
+from kernel_tuner.strategies.diff_evo import (
+    values_to_indices,
+    indices_to_values,
+    mutate_de_1,
+    mutate_de_2,
+    binomial_crossover,
+    exponential_crossover,
+)
 from kernel_tuner.strategies.diff_evo import supported_methods
 from kernel_tuner import tune_kernel
 
 from .test_strategies import vector_add, cache_filename
 
+
 def test_values_to_indices():
 
     tune_params = {}
@@ -37,7 +45,7 @@ def test_indices_to_values():
 
     tune_params["block_size_y"] = [16, 32, 128, 1024]
     expected = [1024, 32]
-    result = indices_to_values([3,1], tune_params)
+    result = indices_to_values([3, 1], tune_params)
     assert result[0] == expected[0]
     assert result[1] == expected[1]
     assert len(result) == len(expected)
@@ -58,7 +66,7 @@ def test_mutate_de_1():
     F = 0.8
     params_list = list(tune_params)
     min_idx = np.zeros(len(tune_params))
-    max_idx = [len(v)-1 for v in tune_params.values()]
+    max_idx = [len(v) - 1 for v in tune_params.values()]
 
     mutant = mutate_de_1(a_idx, randos_idx, F, min_idx, max_idx, False)
 
@@ -94,7 +102,7 @@ def test_mutate_de_2():
     F = 0.8
     params_list = list(tune_params)
     min_idx = np.zeros(len(tune_params))
-    max_idx = [len(v)-1 for v in tune_params.values()]
+    max_idx = [len(v) - 1 for v in tune_params.values()]
 
     mutant = mutate_de_2(a_idx, randos_idx, F, min_idx, max_idx, False)
 
@@ -139,14 +147,14 @@ def test_exponential_crossover():
         assert (val == donor_vector[dim]) or (val == target[dim])
 
 
-@pytest.mark.parametrize('method', supported_methods)
+@pytest.mark.parametrize("method", supported_methods)
 def test_diff_evo(vector_add, method):
-    result, _ = tune_kernel(*vector_add,
-                            strategy="diff_evo",
-                            strategy_options=dict(popsize=5, method=method),
-                            verbose=True,
-                            cache=cache_filename,
-                            simulation_mode=True)
+    result, _ = tune_kernel(
+        *vector_add,
+        strategy="diff_evo",
+        strategy_options=dict(popsize=5, method=method),
+        verbose=True,
+        cache=cache_filename,
+        simulation_mode=True,
+    )
     assert len(result) > 0
-
-

From 655fcc02cfe55fa3d7fc64847ecbfeb4d636b685 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 08:47:04 +0200
Subject: [PATCH 205/253] fix parsing diff_evo method argument

---
 kernel_tuner/strategies/diff_evo.py | 32 ++++++++---------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 92ad21bb0..ec3d1c14c 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -65,7 +65,6 @@ def values_to_indices(individual_values, tune_params):
 def indices_to_values(individual_indices, tune_params):
     """Converts an individual's index vector back to its values."""
     tune_params_list = list(tune_params.values())
-    print(f"{tune_params_list=} {individual_indices=}")
     values = []
     for dim, idx in enumerate(individual_indices):
         values.append(tune_params_list[dim][idx])
@@ -78,18 +77,22 @@ def parse_method(method):
     match = re.fullmatch(pattern, method)
 
     if match:
-        return match.group(1) == "best", int(match.group(2)), mutation[match.group(2)], crossover[match.group(3)]
+        if match.group(1) in ["currenttobest", "randtobest"]:
+            mutation_method = mutation[match.group(1)]
+        else:
+            mutation_method = mutation[match.group(2)]
+        return match.group(1) == "best", int(match.group(2)), mutation_method, crossover[match.group(3)]
     else:
         raise ValueError("Error parsing differential evolution method")
 
 
-def random_draw(idxs, mutation, best):
+def random_draw(idxs, mutate, best):
     """
     Draw requested number of random individuals.
 
     Draw without replacement unless there is not enough to draw from.
     """
-    draw = 2 * mutation + 1 - int(best)
+    draw = 2 * mutate + 1 - int(best)
     return np.random.choice(idxs, draw, replace=draw >= len(idxs))
 
 
@@ -98,23 +101,6 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
     A basic implementation of the Differential Evolution algorithm.
 
     This function finds the minimum of a given cost function within specified bounds.
-
-    Args:
-        cost_func (callable): The objective function to be minimized. It should take a
-                              single argument (a numpy array of parameters) and return a
-                              single scalar value (the cost).
-        bounds (list of tuples): A list where each tuple contains the (min, max) bounds
-                                 for each parameter. e.g., [(-5, 5), (-5, 5)]
-        popsize (int): The size of the population.
-        maxiter (int): The maximum number of generations to run.
-        F (float): The mutation factor, also known as the differential weight.
-                   Should be in the range [0, 2].
-        CR (float): The crossover probability. Should be in the range [0, 1].
-        verbose (bool): If True, prints the progress of the algorithm at each generation.
-
-    Returns:
-        dict: A dictionary containing the best solution found ('solution') and its
-              corresponding cost ('cost').
     """
     tune_params = cost_func.tuning_options.tune_params
     min_idx = np.zeros(len(tune_params))
@@ -335,7 +321,7 @@ def exponential_crossover(donor_vector, target, CR):
 mutation = {
     "1": mutate_de_1,
     "2": mutate_de_2,
-    "currenttobest1": mutate_currenttobest1,
-    "randtobest1": mutate_randtobest1,
+    "currenttobest": mutate_currenttobest1,
+    "randtobest": mutate_randtobest1,
 }
 crossover = {"bin": binomial_crossover, "exp": exponential_crossover}

From 2fdee8077afa11099704c02e0126bbad6dfb6d3b Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 16:03:38 +0200
Subject: [PATCH 206/253] add test for parse method

---
 test/strategies/test_diff_evo.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/strategies/test_diff_evo.py b/test/strategies/test_diff_evo.py
index f89fe8507..d7ff1dbb7 100644
--- a/test/strategies/test_diff_evo.py
+++ b/test/strategies/test_diff_evo.py
@@ -7,6 +7,9 @@
     mutate_de_2,
     binomial_crossover,
     exponential_crossover,
+    parse_method,
+    mutation,
+    crossover,
 )
 from kernel_tuner.strategies.diff_evo import supported_methods
 from kernel_tuner import tune_kernel
@@ -147,6 +150,26 @@ def test_exponential_crossover():
         assert (val == donor_vector[dim]) or (val == target[dim])
 
 
+def test_parse_method():
+
+    # check unsupported methods raise ValueError
+    for method in ["randtobest4bin", "bogus3log"]:
+        print(f"{method=}")
+        with pytest.raises(ValueError):
+            parse_method(method)
+
+    # check if parses correctly
+    def check_result(result, expected):
+        assert len(result) == len(expected)
+        for i, res in enumerate(result):
+            assert res == expected[i]
+
+    check_result(parse_method("rand1bin"), [False, 1, mutation["1"], crossover["bin"]])
+    check_result(parse_method("best1exp"), [True, 1, mutation["1"], crossover["exp"]])
+    check_result(parse_method("randtobest1exp"), [False, 1, mutation["randtobest"], crossover["exp"]])
+    check_result(parse_method("currenttobest1bin"), [False, 1, mutation["currenttobest"], crossover["bin"]])
+
+
 @pytest.mark.parametrize("method", supported_methods)
 def test_diff_evo(vector_add, method):
     result, _ = tune_kernel(

From 539735cbf6aea2fe94be625b78fe1567ce29b230 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 16:03:56 +0200
Subject: [PATCH 207/253] add support for x0 starting point

---
 kernel_tuner/strategies/diff_evo.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index ec3d1c14c..57cac1ac9 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -115,6 +115,7 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
 
     # Initialize the population with random individuals within the bounds
     population = np.array(list(list(p) for p in searchspace.get_random_sample(popsize)))
+    population[0] = cost_func.get_start_pos()
 
     # Calculate the initial cost for each individual in the population
     population_cost = np.array([cost_func(ind) for ind in population])

From baf628dd24d9546edbbf43e6a1e4bcc789b2e6cf Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 16:15:13 +0200
Subject: [PATCH 208/253] add constraint-awareness

---
 kernel_tuner/strategies/diff_evo.py | 45 +++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 57cac1ac9..1d4537edc 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -1,5 +1,5 @@
 """A simple Different Evolution for parameter search."""
-
+import random
 import re
 import numpy as np
 
@@ -14,6 +14,7 @@
     F=("mutation factor (differential weight)", 0.8),
     CR=("crossover rate", 0.9),
     method=("method", "best1bin"),
+    constraint_aware=("constraint-aware optimization (True/False)", True),
 )
 
 supported_methods = [
@@ -37,13 +38,13 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     bounds = cost_func.get_bounds()
 
     options = tuning_options.strategy_options
-    popsize, maxiter, F, CR, method = common.get_options(options, _options)
+    popsize, maxiter, F, CR, method, constraint_aware = common.get_options(options, _options)
 
     if method not in supported_methods:
         raise ValueError(f"Error {method} not supported, {supported_methods=}")
 
     try:
-        differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, tuning_options.verbose)
+        differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, constraint_aware, tuning_options.verbose)
     except util.StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
@@ -96,7 +97,7 @@ def random_draw(idxs, mutate, best):
     return np.random.choice(idxs, draw, replace=draw >= len(idxs))
 
 
-def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, verbose):
+def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, constraint_aware, verbose):
     """
     A basic implementation of the Differential Evolution algorithm.
 
@@ -114,7 +115,18 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
     bounds = np.array(bounds)
 
     # Initialize the population with random individuals within the bounds
-    population = np.array(list(list(p) for p in searchspace.get_random_sample(popsize)))
+    if constraint_aware:
+        population = np.array(list(list(p) for p in searchspace.get_random_sample(popsize)))
+    else:
+        population = []
+        dna_size = len(self.tune_params)
+        for _ in range(self.pop_size):
+            dna = []
+            for key in self.tune_params:
+                dna.append(random.choice(self.tune_params[key]))
+            population.append(dna)
+        population = np.array(population)
+
     population[0] = cost_func.get_start_pos()
 
     # Calculate the initial cost for each individual in the population
@@ -155,6 +167,10 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
             # --- b. Crossover ---
             trial_vector = crossover_method(donor_vector, population[i], CR)
 
+            # Repair if constraint_aware
+            if constraint_aware:
+                trial_vector = repair(trial_vector, searchspace)
+
             # Store for selection
             trial_population.append(trial_vector)
 
@@ -319,6 +335,25 @@ def exponential_crossover(donor_vector, target, CR):
     return trial_idx
 
 
+def repair(trial_vector, searchspace):
+    """
+    Attempts to repair trial_vector if trial_vector is invalid
+    """
+    if not searchspace.is_param_config_valid(tuple(trial_vector)):
+        # search for valid configurations neighboring trial_vector
+        # start from strictly-adjacent to increasingly allowing more neighbors
+        for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+            neighbors = searchspace.get_neighbors_no_cache(tuple(trial_vector), neighbor_method=neighbor_method)
+
+            # if we have found valid neighboring configurations, select one at random
+            if len(neighbors) > 0:
+                new_trial_vector = np.array(list(random.choice(neighbors)))
+                print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired dna to {new_trial_vector=}")
+                return new_trial_vector
+
+    return trial_vector
+
+
 mutation = {
     "1": mutate_de_1,
     "2": mutate_de_2,

From 7d3861dfd1711355e170f60722ac4e27296072c5 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 4 Jul 2025 22:28:29 +0200
Subject: [PATCH 209/253] Improved bounds check to deal with non-numericals,
 other improvements

---
 kernel_tuner/strategies/common.py              | 6 +++++-
 kernel_tuner/strategies/diff_evo.py            | 2 +-
 kernel_tuner/strategies/pso.py                 | 9 +--------
 kernel_tuner/strategies/simulated_annealing.py | 7 +------
 4 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 1652584a2..3cdede7ed 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -242,7 +242,11 @@ def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
         for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
-            bounds.append((min(values), max(values)))
+            try:
+                bounds.append((min(values), max(values)))
+            except TypeError:
+                # if values are not numeric, use the first and last value as bounds
+                bounds.append((values[0], values[-1]))
         return bounds
 
     def encoded_to_params(self, config):
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 5285c5b64..799d613c7 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -45,7 +45,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     try:
         differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, constraint_aware, tuning_options.verbose)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 93c16bd23..ec7efc2ee 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -23,18 +23,11 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     # scale variables in x because PSO works with velocities to visit different configurations
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
 
-<<<<<<< HEAD
     # using this instead of get_bounds because scaling is used
-    bounds, _, eps = cost_func.get_bounds_x0_eps()
+    bounds, x0, eps = cost_func.get_bounds_x0_eps()
 
     num_particles, maxiter, w, c1, c2, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
     num_particles = min(round(searchspace.size / 2), num_particles)
-=======
-    #using this instead of get_bounds because scaling is used
-    bounds, x0, eps = cost_func.get_bounds_x0_eps()
-
-    num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options)
->>>>>>> origin/custom_diff_evo
 
     best_score_global = sys.float_info.max
     best_position_global = []
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 2add27476..2ca9c62ba 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -35,13 +35,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     max_fevals = min(searchspace.size, max_fevals)
 
     # get random starting point and evaluate cost
-<<<<<<< HEAD
-    pos = generate_starting_point(searchspace, constraint_aware)
-    old_cost = cost_func(pos, check_restrictions=not constraint_aware)
-=======
     pos = cost_func.get_start_pos()
-    old_cost = cost_func(pos, check_restrictions=False)
->>>>>>> origin/custom_diff_evo
+    old_cost = cost_func(pos, check_restrictions=not constraint_aware)
 
     # main optimization loop
     stuck = 0

From 958f8bd940c00e5e49317fb6ff9bb25b54781bef Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 4 Jul 2025 22:51:08 +0200
Subject: [PATCH 210/253] Replaced undefined references to self

---
 kernel_tuner/strategies/diff_evo.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 799d613c7..5607e4994 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -119,11 +119,11 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
         population = np.array(list(list(p) for p in searchspace.get_random_sample(popsize)))
     else:
         population = []
-        dna_size = len(self.tune_params)
-        for _ in range(self.pop_size):
+        dna_size = len(tune_params)
+        for _ in range(pop_size):
             dna = []
-            for key in self.tune_params:
-                dna.append(random.choice(self.tune_params[key]))
+            for key in tune_params:
+                dna.append(random.choice(tune_params[key]))
             population.append(dna)
         population = np.array(population)
 

From ea7a69d7c620ecffb920558766f8bd5d7f5fe79a Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 23:28:41 +0200
Subject: [PATCH 211/253] LHS sampling, enforce trial population diversity,
 avoid getting stuck

---
 kernel_tuner/strategies/diff_evo.py | 83 ++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 19 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 81ecac631..bc2099982 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -3,6 +3,8 @@
 import re
 import numpy as np
 
+from scipy.stats.qmc import LatinHypercube
+
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
@@ -11,7 +13,7 @@
 _options = dict(
     popsize=("population size", 50),
     maxiter=("maximum number of generations", 200),
-    F=("mutation factor (differential weight)", 0.8),
+    F=("mutation factor (differential weight)", 1.3),
     CR=("crossover rate", 0.9),
     method=("method", "best1bin"),
     constraint_aware=("constraint-aware optimization (True/False)", True),
@@ -35,7 +37,7 @@
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     cost_func = CostFunc(searchspace, tuning_options, runner)
-    bounds, x0, _ = cost_func.get_bounds_x0_eps()
+    bounds = cost_func.get_bounds()
 
     options = tuning_options.strategy_options
     popsize, maxiter, F, CR, method, constraint_aware = common.get_options(options, _options)
@@ -97,6 +99,22 @@ def random_draw(idxs, mutate, best):
     return np.random.choice(idxs, draw, replace=draw >= len(idxs))
 
 
+def generate_population(tune_params, min_idx, max_idx, popsize, searchspace, constraint_aware):
+    if constraint_aware:
+        samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
+        population = [indices_to_values(sample, tune_params) for sample in samples]
+        population = np.array([repair(individual, searchspace) for individual in population])
+    else:
+        population = []
+        for _ in range(popsize):
+            ind = []
+            for key in tune_params:
+                ind.append(random.choice(tune_params[key]))
+            population.append(ind)
+        population = np.array(population)
+    return population
+
+
 def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, constraint_aware, verbose):
     """
     A basic implementation of the Differential Evolution algorithm.
@@ -115,18 +133,9 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
     bounds = np.array(bounds)
 
     # Initialize the population with random individuals within the bounds
-    if constraint_aware:
-        population = np.array(list(list(p) for p in searchspace.get_random_sample(popsize)))
-    else:
-        population = []
-        dna_size = len(self.tune_params)
-        for _ in range(self.pop_size):
-            dna = []
-            for key in self.tune_params:
-                dna.append(random.choice(self.tune_params[key]))
-            population.append(dna)
-        population = np.array(population)
+    population = generate_population(tune_params, min_idx, max_idx, popsize, searchspace, constraint_aware)
 
+    # Override with user-specified starting position
     population[0] = cost_func.get_start_pos()
 
     # Calculate the initial cost for each individual in the population
@@ -140,16 +149,25 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
 
     # --- 2. Main Loop ---
 
+    stabilized = 0
+
     # Iterate through the specified number of generations
     for generation in range(maxiter):
 
+        # Trial population and vectors are stored as lists
+        # not Numpy arrays, to make it easy to check for duplicates
         trial_population = []
 
+        # If for two generations there has been no change, generate a new population
+        if stabilized > 2:
+            trial_population = list(generate_population(tune_params, min_idx, max_idx, popsize, searchspace, constraint_aware))
+
         # Iterate over each individual in the population
-        for i in range(popsize):
+        i = 0
+        stuck = 0
+        while len(trial_population) < popsize:
 
             # --- a. Mutation ---
-
             # Select three distinct random individuals (a, b, c) from the population,
             # ensuring they are different from the current individual 'i'.
             idxs = [idx for idx in range(popsize) if idx != i]
@@ -172,13 +190,28 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
                 trial_vector = repair(trial_vector, searchspace)
 
             # Store for selection
-            trial_population.append(trial_vector)
+            if list(trial_vector) not in trial_population:
+                trial_population.append(list(trial_vector))
+                i += 1
+                stuck = 0
+            else:
+                stuck += 1
+                if stuck >= 20:
+                    if verbose:
+                        print(f"Differential Evolution got stuck generating new individuals, insert random sample")
+                    trial_population.append(list(searchspace.get_random_sample(1)[0]))
+                    i += 1
+                    stuck = 0
+
 
         # --- c. Selection ---
 
         # Calculate the cost of the new trial vectors
         trial_population_cost = np.array([cost_func(ind) for ind in trial_population])
 
+        # Keep track of whether population changes over time
+        no_change = True
+
         # Iterate over each individual in the trial population
         for i in range(popsize):
 
@@ -188,8 +221,13 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
             # If the trial vector has a lower or equal cost, it replaces the
             # target vector in the population for the next generation.
             if trial_cost <= population_cost[i]:
-                population[i] = trial_vector
-                population_cost[i] = trial_cost
+
+                # check if trial_vector is not already in population
+                idxs = [idx for idx in range(popsize) if idx != i]
+                if trial_vector not in population[idxs]:
+                    population[i] = np.array(trial_vector)
+                    population_cost[i] = trial_cost
+                    no_change = False
 
                 # Update the overall best solution if the new one is better
                 if trial_cost < best_cost:
@@ -197,10 +235,17 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
                     best_solution = trial_vector
                     best_solution_idx = values_to_indices(best_solution, tune_params)
 
+        # Note if population is stabilizing
+        if no_change:
+            stabilized += 1
+
         # Print the progress at the end of the generation
         if verbose:
             print(f"Generation {generation + 1}, Best Cost: {best_cost:.6f}")
 
+    if verbose:
+        print(f"Differential Evolution completed fevals={len(cost_func.tuning_options.unique_results)}")
+
     return {"solution": best_solution, "cost": best_cost}
 
 
@@ -348,7 +393,7 @@ def repair(trial_vector, searchspace):
             # if we have found valid neighboring configurations, select one at random
             if len(neighbors) > 0:
                 new_trial_vector = np.array(list(random.choice(neighbors)))
-                print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired dna to {new_trial_vector=}")
+                print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}")
                 return new_trial_vector
 
     return trial_vector

From fd96b81e7847163238fea42254c3cde489e8263d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 4 Jul 2025 23:30:26 +0200
Subject: [PATCH 212/253] Moved cache_filename reference to top-level for
 import elsewhere

---
 test/strategies/test_strategies.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 5da721497..5fd0c4d6f 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -11,6 +11,8 @@
 from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch, skip_if_no_pyatf
 
 
+cache_filename =  Path(__file__).parent / "test_cache_file.json"
+
 @pytest.fixture
 def vector_add():
     kernel_string = """
@@ -53,7 +55,6 @@ def vector_add():
         strategies.append(s)
 @pytest.mark.parametrize('strategy', strategies)
 def test_strategies(vector_add, strategy):
-    cache_filename =  Path(__file__).parent / "test_cache_file.json"
     options = dict(popsize=5, neighbor='adjacent')
 
     print(f"testing {strategy}")
@@ -69,18 +70,19 @@ def test_strategies(vector_add, strategy):
     restrictions = ["test_string == 'alg_2'", "test_bool == True", "test_mixed == 2.45"]
 
     # pyATF can't handle non-number tune parameters, so we filter them out
+    cache_filename_local = cache_filename
     if strategy == "pyatf_strategies":
         tune_params = {
             "block_size_x": [128 + 64 * i for i in range(15)]
         }
         restrictions = []
-        cache_filename = cache_filename.parent.parent / "test_cache_file.json"
+        cache_filename_local = cache_filename_local.parent.parent / "test_cache_file.json"
         vector_add[-1] = tune_params
 
     # run the tuning in simulation mode
-    assert cache_filename.exists()
+    assert cache_filename_local.exists()
     results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
-                                         verbose=False, cache=cache_filename, simulation_mode=True)
+                                         verbose=False, cache=cache_filename_local, simulation_mode=True)
 
     assert len(results) > 0
 

From 26c8127e5bff1a88a92f31d299726676966226a3 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 4 Jul 2025 23:39:41 +0200
Subject: [PATCH 213/253] code quality improvements

---
 kernel_tuner/strategies/diff_evo.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index bc2099982..6268ddee6 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -99,7 +99,8 @@ def random_draw(idxs, mutate, best):
     return np.random.choice(idxs, draw, replace=draw >= len(idxs))
 
 
-def generate_population(tune_params, min_idx, max_idx, popsize, searchspace, constraint_aware):
+def generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware):
+    """ Generate new population, returns Numpy array """
     if constraint_aware:
         samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
         population = [indices_to_values(sample, tune_params) for sample in samples]
@@ -133,7 +134,7 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
     bounds = np.array(bounds)
 
     # Initialize the population with random individuals within the bounds
-    population = generate_population(tune_params, min_idx, max_idx, popsize, searchspace, constraint_aware)
+    population = generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware)
 
     # Override with user-specified starting position
     population[0] = cost_func.get_start_pos()
@@ -160,7 +161,7 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
 
         # If for two generations there has been no change, generate a new population
         if stabilized > 2:
-            trial_population = list(generate_population(tune_params, min_idx, max_idx, popsize, searchspace, constraint_aware))
+            trial_population = list(generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware))
 
         # Iterate over each individual in the population
         i = 0
@@ -189,19 +190,20 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
             if constraint_aware:
                 trial_vector = repair(trial_vector, searchspace)
 
-            # Store for selection
+            # Store for selection, if not in trial_population already
             if list(trial_vector) not in trial_population:
                 trial_population.append(list(trial_vector))
                 i += 1
                 stuck = 0
             else:
                 stuck += 1
-                if stuck >= 20:
-                    if verbose:
-                        print(f"Differential Evolution got stuck generating new individuals, insert random sample")
-                    trial_population.append(list(searchspace.get_random_sample(1)[0]))
-                    i += 1
-                    stuck = 0
+
+            if stuck >= 20:
+                if verbose:
+                    print("Differential Evolution got stuck generating new individuals, insert random sample")
+                trial_population.append(list(searchspace.get_random_sample(1)[0]))
+                i += 1
+                stuck = 0
 
 
         # --- c. Selection ---

From 7b5cd29572da13faf8554649a615a094fa187b54 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 10 Jul 2025 11:49:42 +0200
Subject: [PATCH 214/253] Improved passing of restrictions

---
 kernel_tuner/interface.py          |  1 -
 kernel_tuner/searchspace.py        | 21 ++++++++++++---------
 test/strategies/test_strategies.py | 16 ++++++++--------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index cc8a48112..32e91c86f 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -646,7 +646,6 @@ def tune_kernel(
 
         # ensure strategy_options is an Options object
         tuning_options.strategy_options = Options(strategy_options or {})
-
     # if no strategy selected
     else:
         strategy = brute_force
diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6639a82f7..e263b600b 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -5,6 +5,7 @@
 from random import choice, shuffle
 from typing import List, Union
 from warnings import warn
+from copy import deepcopy
 
 import numpy as np
 from constraint import (
@@ -92,10 +93,11 @@ def __init__(
         self._tensorspace_param_config_structure = []
         self._map_tensor_to_param = {}
         self._map_param_to_tensor = {}
-        self.restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
-        self.original_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
+        restrictions = list(restrictions) if not isinstance(restrictions, (list, tuple)) else restrictions
+        self.restrictions = deepcopy(restrictions)
+        self.original_restrictions = deepcopy(restrictions)  # keep the original restrictions, so that the searchspace can be modified later
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
+        self._modified_restrictions = deepcopy(restrictions)
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None
@@ -479,8 +481,9 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver:
 
     def __add_restrictions(self, parameter_space: Problem) -> Problem:
         """Add the user-specified restrictions as constraints on the parameter space."""
-        if isinstance(self.restrictions, list):
-            for restriction in self.restrictions:
+        restrictions = deepcopy(self.restrictions)
+        if isinstance(restrictions, list):
+            for restriction in restrictions:
                 required_params = self.param_names
 
                 # (un)wrap where necessary
@@ -510,14 +513,14 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem:
                     raise ValueError(f"Unrecognized restriction type {type(restriction)} ({restriction})")
 
         # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!)
-        elif callable(self.restrictions):
+        elif callable(restrictions):
 
             def restrictions_wrapper(*args):
-                return check_instance_restrictions(self.restrictions, dict(zip(self.param_names, args)), False)
+                return check_instance_restrictions(restrictions, dict(zip(self.param_names, args)), False)
 
             parameter_space.addConstraint(FunctionConstraint(restrictions_wrapper), self.param_names)
-        elif self.restrictions is not None:
-            raise ValueError(f"The restrictions are of unsupported type {type(self.restrictions)}")
+        elif restrictions is not None:
+            raise ValueError(f"The restrictions are of unsupported type {type(restrictions)}")
         return parameter_space
 
     def __parse_restrictions_pysmt(self, restrictions: list, tune_params: dict, symbols: dict):
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 5fd0c4d6f..449a942d7 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -67,7 +67,11 @@ def test_strategies(vector_add, strategy):
     if strategy != "brute_force":
         filter_options["max_fevals"] = 10
 
-    restrictions = ["test_string == 'alg_2'", "test_bool == True", "test_mixed == 2.45"]
+    restrictions = [
+        "test_string == 'alg_2'", 
+        "test_bool == True", 
+        "test_mixed == 2.45"
+    ]
 
     # pyATF can't handle non-number tune parameters, so we filter them out
     cache_filename_local = cache_filename
@@ -81,6 +85,7 @@ def test_strategies(vector_add, strategy):
 
     # run the tuning in simulation mode
     assert cache_filename_local.exists()
+    assert restrictions is not None
     results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                          verbose=False, cache=cache_filename_local, simulation_mode=True)
 
@@ -123,15 +128,10 @@ def test_strategies(vector_add, strategy):
     x0 = [256]
     filter_options["x0"] = x0
     if not strategy in ["brute_force", "random_sample", "bayes_opt"]:
-        results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options,
+        results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                             verbose=False, cache=cache_filename, simulation_mode=True)
         assert results[0]["block_size_x"] == x0[0]
     else:
         with pytest.raises(ValueError):
-            results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options,
+            results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                             verbose=False, cache=cache_filename, simulation_mode=True)
-
-
-
-
-

From e0aaf24668975dea3b81999f0c36111c01a52486 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 10 Jul 2025 16:39:00 +0200
Subject: [PATCH 215/253] Improvements to how non-numeric configurations are
 handled

---
 kernel_tuner/strategies/common.py | 54 +++++++++++++++++++------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 3cdede7ed..0e21fc69a 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -62,6 +62,11 @@ def get_options(strategy_options, options, unsupported=None):
     return [strategy_options.get(opt, default) for opt, (_, default) in options.items()]
 
 
+def is_number(value) -> bool:
+    """Check if a value is a real number (false on booleans and complex numbers)."""
+    return isinstance(value, numbers.Real) and not isinstance(value, bool)
+
+
 class CostFunc:
     """Class encapsulating the CostFunc method."""
 
@@ -73,7 +78,7 @@ def __init__(
         *,
         scaling=False,
         snap=True,
-        encode_non_numeric=False,
+        encode_non_numeric=None,
         return_invalid=False,
         return_raw=None,
     ):
@@ -85,35 +90,36 @@ def __init__(
             runner: the runner to use.
             scaling: whether to internally scale parameter values. Defaults to False.
             snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True.
-            encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
+            encode_non_numeric: whether to encode non-numeric parameter values. Defaults to None, meaning it is applied when necessary.
             return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
             return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None.
         """
-        self.runner = runner
-        self.snap = snap
-        self.scaling = scaling
-        self.encode_non_numeric = encode_non_numeric
-        self.return_invalid = return_invalid
-        self.return_raw = return_raw
-        if return_raw is True:
-            self.return_raw = f"{tuning_options['objective']}s"
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
             self.tuning_options["max_fevals"] = min(
                 tuning_options["max_fevals"] if "max_fevals" in tuning_options else np.inf, searchspace.size
             )
+        self.runner = runner
+        self.scaling = scaling
+        self.snap = snap
+        self.encode_non_numeric = encode_non_numeric if encode_non_numeric is not None else not all([all(is_number(v) for v in param_values) for param_values in self.searchspace.params_values])
+        self.return_invalid = return_invalid
+        self.return_raw = return_raw
+        if return_raw is True:
+            self.return_raw = f"{tuning_options['objective']}s"
         self.results = []
         self.budget_spent_fraction = 0.0
 
         # if enabled, encode non-numeric parameter values as a numeric value
+        # NOTE careful, this shouldn't conflict with Searchspace tensorspace
         if self.encode_non_numeric:
             self._map_param_to_encoded = {}
             self._map_encoded_to_param = {}
             self.encoded_params_values = []
             for i, param_values in enumerate(self.searchspace.params_values):
                 encoded_values = param_values
-                if not all(isinstance(v, numbers.Real) for v in param_values):
+                if not all(is_number(v) for v in param_values):
                     encoded_values = np.arange(
                         len(param_values)
                     )  # NOTE when changing this, adjust the rounding in encoded_to_params
@@ -124,8 +130,10 @@ def __init__(
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
-        if self.encode_non_numeric:
-            x = self.encoded_to_params(x)
+        if self.encode_non_numeric and not self.scaling:
+            x_numeric = self.params_to_encoded(x)
+        else:
+            x_numeric = x
 
         # error value to return for numeric optimizers that need a numerical value
         logging.debug("_cost_func called")
@@ -137,7 +145,9 @@ def __call__(self, x, check_restrictions=True):
         # snap values in x to nearest actual value for each parameter, unscale x if needed
         if self.snap:
             if self.scaling:
-                params = unscale_and_snap_to_nearest(x, self.searchspace.tune_params, self.tuning_options.eps)
+                params = unscale_and_snap_to_nearest(x_numeric, self.searchspace.tune_params, self.tuning_options.eps)
+                if self.encode_non_numeric and not self.scaling:
+                    params = self.encoded_to_params(params)
             else:
                 params = snap_to_nearest_config(x, self.searchspace.tune_params)
         else:
@@ -155,8 +165,10 @@ def __call__(self, x, check_restrictions=True):
 
             if "constraint_aware" in self.tuning_options.strategy_options and self.tuning_options.strategy_options["constraint_aware"]:
                 # attempt to repair
-                new_params = unscale_and_snap_to_nearest_valid(x, params, self.searchspace, self.tuning_options.eps)
+                new_params = unscale_and_snap_to_nearest_valid(x_numeric, params, self.searchspace, self.tuning_options.eps)
                 if new_params:
+                    if self.encode_non_numeric:
+                        new_params = self.encoded_to_params(new_params)
                     params = new_params
                     legal = True
                     x_int = ",".join([str(i) for i in params])
@@ -209,6 +221,7 @@ def get_bounds_x0_eps(self):
 
         if "x0" in self.tuning_options.strategy_options:
             x0 = self.tuning_options.strategy_options.x0
+            assert isinstance(x0, (tuple, list)) and len(x0) == len(values), f"Invalid x0: {x0}, expected number of parameters of `tune_params` to match ({len(values)})"
         else:
             x0 = None
 
@@ -242,11 +255,7 @@ def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
         for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
-            try:
-                bounds.append((min(values), max(values)))
-            except TypeError:
-                # if values are not numeric, use the first and last value as bounds
-                bounds.append((values[0], values[-1]))
+            bounds.append((min(values), max(values)))
         return bounds
 
     def encoded_to_params(self, config):
@@ -277,7 +286,10 @@ def params_to_encoded(self, config):
             raise ValueError("'encode_non_numeric' must be set to true to use this function.")
         encoded = []
         for i, v in enumerate(config):
-            encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
+            try:
+                encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
+            except KeyError:
+                raise KeyError(f"{config} parameter value {v} not found in {self._map_param_to_encoded} for parameter {i}.")
         assert len(encoded) == len(config)
         return encoded
 

From 2d41660aef3651b8da4a3898c280774bdb4f1c21 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Thu, 10 Jul 2025 16:57:19 +0200
Subject: [PATCH 216/253] string values compatible

---
 kernel_tuner/strategies/diff_evo.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 6268ddee6..c441f446e 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -71,7 +71,7 @@ def indices_to_values(individual_indices, tune_params):
     values = []
     for dim, idx in enumerate(individual_indices):
         values.append(tune_params_list[dim][idx])
-    return np.array(values)
+    return values
 
 
 def parse_method(method):
@@ -104,7 +104,7 @@ def generate_population(tune_params, max_idx, popsize, searchspace, constraint_a
     if constraint_aware:
         samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
         population = [indices_to_values(sample, tune_params) for sample in samples]
-        population = np.array([repair(individual, searchspace) for individual in population])
+        population = [repair(individual, searchspace) for individual in population]
     else:
         population = []
         for _ in range(popsize):
@@ -112,7 +112,7 @@ def generate_population(tune_params, max_idx, popsize, searchspace, constraint_a
             for key in tune_params:
                 ind.append(random.choice(tune_params[key]))
             population.append(ind)
-        population = np.array(population)
+        population = population
     return population
 
 
@@ -354,7 +354,7 @@ def binomial_crossover(donor_vector, target, CR):
     # Apply crossover
     trial_vector[crossover_points] = donor_vector[crossover_points]
 
-    return trial_vector
+    return list(trial_vector)
 
 
 def exponential_crossover(donor_vector, target, CR):
@@ -379,7 +379,7 @@ def exponential_crossover(donor_vector, target, CR):
         trial_idx[crossover_point] = donor_vector[crossover_point]
         l += 1
 
-    return trial_idx
+    return list(trial_idx)
 
 
 def repair(trial_vector, searchspace):
@@ -394,7 +394,7 @@ def repair(trial_vector, searchspace):
 
             # if we have found valid neighboring configurations, select one at random
             if len(neighbors) > 0:
-                new_trial_vector = np.array(list(random.choice(neighbors)))
+                new_trial_vector = list(random.choice(neighbors))
                 print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}")
                 return new_trial_vector
 

From 7e4f38cddb4c5dfa181d17068b06796a580b2140 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 10 Jul 2025 17:03:45 +0200
Subject: [PATCH 217/253] Improvements to how non-numeric configurations are
 handled

---
 kernel_tuner/strategies/common.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 0e21fc69a..55a987c99 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -130,10 +130,6 @@ def __init__(
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
-        if self.encode_non_numeric and not self.scaling:
-            x_numeric = self.params_to_encoded(x)
-        else:
-            x_numeric = x
 
         # error value to return for numeric optimizers that need a numerical value
         logging.debug("_cost_func called")
@@ -145,9 +141,7 @@ def __call__(self, x, check_restrictions=True):
         # snap values in x to nearest actual value for each parameter, unscale x if needed
         if self.snap:
             if self.scaling:
-                params = unscale_and_snap_to_nearest(x_numeric, self.searchspace.tune_params, self.tuning_options.eps)
-                if self.encode_non_numeric and not self.scaling:
-                    params = self.encoded_to_params(params)
+                params = unscale_and_snap_to_nearest(x, self.searchspace.tune_params, self.tuning_options.eps)
             else:
                 params = snap_to_nearest_config(x, self.searchspace.tune_params)
         else:
@@ -165,10 +159,8 @@ def __call__(self, x, check_restrictions=True):
 
             if "constraint_aware" in self.tuning_options.strategy_options and self.tuning_options.strategy_options["constraint_aware"]:
                 # attempt to repair
-                new_params = unscale_and_snap_to_nearest_valid(x_numeric, params, self.searchspace, self.tuning_options.eps)
+                new_params = unscale_and_snap_to_nearest_valid(x, params, self.searchspace, self.tuning_options.eps)
                 if new_params:
-                    if self.encode_non_numeric:
-                        new_params = self.encoded_to_params(new_params)
                     params = new_params
                     legal = True
                     x_int = ",".join([str(i) for i in params])
@@ -254,8 +246,12 @@ def get_bounds_x0_eps(self):
     def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
-        for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
-            bounds.append((min(values), max(values)))
+        for values in self.searchspace.params_values:
+            try:
+                bounds.append((min(values), max(values)))
+            except TypeError:
+                # if values are not numbers, use the first and last value as bounds
+                bounds.append((values[0], values[-1]))
         return bounds
 
     def encoded_to_params(self, config):

From 8da11a7fcec8e309709c1c6fecf09a6741ede883 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 10 Jul 2025 17:07:11 +0200
Subject: [PATCH 218/253] Searchspace object improvements in checking for
 tensorspace and error messaging

---
 kernel_tuner/searchspace.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index e263b600b..501995614 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -93,7 +93,7 @@ def __init__(
         self._tensorspace_param_config_structure = []
         self._map_tensor_to_param = {}
         self._map_param_to_tensor = {}
-        restrictions = list(restrictions) if not isinstance(restrictions, (list, tuple)) else restrictions
+        restrictions = [restrictions] if not isinstance(restrictions, (list, tuple)) else restrictions
         self.restrictions = deepcopy(restrictions)
         self.original_restrictions = deepcopy(restrictions)  # keep the original restrictions, so that the searchspace can be modified later
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
@@ -687,7 +687,15 @@ def get_list_numpy(self) -> np.ndarray:
 
     def get_param_indices(self, param_config: tuple) -> tuple:
         """For each parameter value in the param config, find the index in the tunable parameters."""
-        return tuple(self.params_values[index].index(param_value) for index, param_value in enumerate(param_config))
+        try:
+            return tuple(self.params_values[index].index(param_value) for index, param_value in enumerate(param_config))
+        except ValueError as e:
+            for index, param_value in enumerate(param_config):
+                if param_value not in self.params_values[index]:
+                    # if the parameter value is not in the list of values for that parameter, raise an error
+                    raise ValueError(
+                        f"Parameter value {param_value} ({type(param_value)}) is not in the list of values {self.params_values[index]}"
+                    ) from e
 
     def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         """Get the param configs at the given indices."""
@@ -753,9 +761,13 @@ def initialize_tensorspace(self, dtype=None, device=None):
         bounds = torch.tensor(bounds, **self.tensor_kwargs)
         self._tensorspace_bounds = torch.cat([bounds[:, 0], bounds[:, 1]]).reshape((2, bounds.shape[0]))
 
+    def has_tensorspace(self) -> bool:
+        """Check if the tensorspace has been initialized."""
+        return self._tensorspace is not None
+
     def get_tensorspace(self):
         """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first."""
-        if self._tensorspace is None:
+        if not self.has_tensorspace():
             self.initialize_tensorspace()
         return self._tensorspace
 
@@ -800,7 +812,7 @@ def tensor_to_param_config(self, tensor):
 
     def get_tensorspace_bounds(self):
         """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters."""
-        if self._tensorspace is None:
+        if not self.has_tensorspace():
             self.initialize_tensorspace()
         return self._tensorspace_bounds, self._tensorspace_bounds_indices
 

From 28a149e0e3ddc93bb0f16a41f35051b5dd3e0232 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 10 Jul 2025 17:08:29 +0200
Subject: [PATCH 219/253] Improved tests using restrictions and extended
 parameters where necessary

---
 test/strategies/test_diff_evo.py   | 6 ++++++
 test/strategies/test_strategies.py | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/test/strategies/test_diff_evo.py b/test/strategies/test_diff_evo.py
index d7ff1dbb7..4da12dfc6 100644
--- a/test/strategies/test_diff_evo.py
+++ b/test/strategies/test_diff_evo.py
@@ -172,8 +172,14 @@ def check_result(result, expected):
 
 @pytest.mark.parametrize("method", supported_methods)
 def test_diff_evo(vector_add, method):
+    restrictions = [
+        "test_string == 'alg_2'", 
+        "test_bool == True", 
+        "test_mixed == 2.45"
+    ]
     result, _ = tune_kernel(
         *vector_add,
+        restrictions=restrictions,
         strategy="diff_evo",
         strategy_options=dict(popsize=5, method=method),
         verbose=True,
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 449a942d7..ea5a2994d 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -125,9 +125,9 @@ def test_strategies(vector_add, strategy):
             assert isinstance(res[expected_key], expected_type)
 
     # check if strategy respects user-specified starting point (x0)
-    x0 = [256]
+    x0 = [256, 'alg_2', 15, True, 2.45]
     filter_options["x0"] = x0
-    if not strategy in ["brute_force", "random_sample", "bayes_opt"]:
+    if not strategy in ["brute_force", "random_sample", "bayes_opt", "pyatf_strategies"]:
         results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                             verbose=False, cache=cache_filename, simulation_mode=True)
         assert results[0]["block_size_x"] == x0[0]

From b170eef8397eb48bf067f2f90c7c90bc8876fc9a Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Thu, 10 Jul 2025 17:12:35 +0200
Subject: [PATCH 220/253] string values compatible, for real this time

---
 kernel_tuner/strategies/diff_evo.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index c441f446e..c8330d4dc 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -112,7 +112,6 @@ def generate_population(tune_params, max_idx, popsize, searchspace, constraint_a
             for key in tune_params:
                 ind.append(random.choice(tune_params[key]))
             population.append(ind)
-        population = population
     return population
 
 
@@ -225,9 +224,8 @@ def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F,
             if trial_cost <= population_cost[i]:
 
                 # check if trial_vector is not already in population
-                idxs = [idx for idx in range(popsize) if idx != i]
-                if trial_vector not in population[idxs]:
-                    population[i] = np.array(trial_vector)
+                if population.count(trial_vector) == 0:
+                    population[i] = trial_vector
                     population_cost[i] = trial_cost
                     no_change = False
 
@@ -352,7 +350,7 @@ def binomial_crossover(donor_vector, target, CR):
         crossover_points[np.random.randint(0, dimensions)] = True
 
     # Apply crossover
-    trial_vector[crossover_points] = donor_vector[crossover_points]
+    trial_vector[crossover_points] = np.array(donor_vector)[crossover_points]
 
     return list(trial_vector)
 
@@ -376,7 +374,7 @@ def exponential_crossover(donor_vector, target, CR):
     l = 0
     while np.random.rand() < CR and l < dimensions:
         crossover_point = (start_point + l) % dimensions
-        trial_idx[crossover_point] = donor_vector[crossover_point]
+        trial_idx[crossover_point] = np.array(donor_vector)[crossover_point]
         l += 1
 
     return list(trial_idx)

From 19d51275c59126e5a790165de1408269d5c567a6 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Fri, 11 Jul 2025 09:24:20 +0200
Subject: [PATCH 221/253] further reducing use of numpy arrays for representing
 configs

---
 kernel_tuner/strategies/diff_evo.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index c8330d4dc..c415f6f92 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -338,7 +338,7 @@ def mutate_de_2(best_idx, randos_idx, F, min_idx, max_idx, best):
 def binomial_crossover(donor_vector, target, CR):
     """Performs binomial crossover of donor_vector with target given crossover rate CR."""
     # Create the trial vector by mixing parameters from the target and donor vectors
-    trial_vector = np.copy(target)
+    trial_vector = target.copy()
     dimensions = len(donor_vector)
 
     # Generate a random array of floats for comparison with the crossover rate CR
@@ -350,9 +350,11 @@ def binomial_crossover(donor_vector, target, CR):
         crossover_points[np.random.randint(0, dimensions)] = True
 
     # Apply crossover
-    trial_vector[crossover_points] = np.array(donor_vector)[crossover_points]
+    for i, d in enumerate(donor_vector):
+        if crossover_points[i]:
+            trial_vector[i] = donor_vector[i]
 
-    return list(trial_vector)
+    return trial_vector
 
 
 def exponential_crossover(donor_vector, target, CR):
@@ -363,7 +365,7 @@ def exponential_crossover(donor_vector, target, CR):
     from the donor vector and the rest from the target vector.
     """
     dimensions = len(target)
-    trial_idx = np.copy(target)
+    trial_vector = target.copy()
 
     # 1. Select a random starting point for the crossover block.
     start_point = np.random.randint(0, dimensions)
@@ -374,10 +376,10 @@ def exponential_crossover(donor_vector, target, CR):
     l = 0
     while np.random.rand() < CR and l < dimensions:
         crossover_point = (start_point + l) % dimensions
-        trial_idx[crossover_point] = np.array(donor_vector)[crossover_point]
+        trial_vector[crossover_point] = donor_vector[crossover_point]
         l += 1
 
-    return list(trial_idx)
+    return trial_vector
 
 
 def repair(trial_vector, searchspace):

From 214865f26610ca53d1d7086600e67fb43d93c278 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 11 Jul 2025 13:39:22 +0200
Subject: [PATCH 222/253] Removed encoding of non-numeric parameters in favour
 of index-based

---
 kernel_tuner/strategies/common.py | 60 -------------------------------
 1 file changed, 60 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 55a987c99..f595937d7 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,13 +1,11 @@
 """Module for functionality that is commonly used throughout the strategies."""
 
 import logging
-import numbers
 import sys
 from time import perf_counter
 
 import numpy as np
 from scipy.spatial import distance
-import numbers
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
@@ -62,11 +60,6 @@ def get_options(strategy_options, options, unsupported=None):
     return [strategy_options.get(opt, default) for opt, (_, default) in options.items()]
 
 
-def is_number(value) -> bool:
-    """Check if a value is a real number (false on booleans and complex numbers)."""
-    return isinstance(value, numbers.Real) and not isinstance(value, bool)
-
-
 class CostFunc:
     """Class encapsulating the CostFunc method."""
 
@@ -78,7 +71,6 @@ def __init__(
         *,
         scaling=False,
         snap=True,
-        encode_non_numeric=None,
         return_invalid=False,
         return_raw=None,
     ):
@@ -90,7 +82,6 @@ def __init__(
             runner: the runner to use.
             scaling: whether to internally scale parameter values. Defaults to False.
             snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True.
-            encode_non_numeric: whether to encode non-numeric parameter values. Defaults to None, meaning it is applied when necessary.
             return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
             return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None.
         """
@@ -103,7 +94,6 @@ def __init__(
         self.runner = runner
         self.scaling = scaling
         self.snap = snap
-        self.encode_non_numeric = encode_non_numeric if encode_non_numeric is not None else not all([all(is_number(v) for v in param_values) for param_values in self.searchspace.params_values])
         self.return_invalid = return_invalid
         self.return_raw = return_raw
         if return_raw is True:
@@ -111,21 +101,6 @@ def __init__(
         self.results = []
         self.budget_spent_fraction = 0.0
 
-        # if enabled, encode non-numeric parameter values as a numeric value
-        # NOTE careful, this shouldn't conflict with Searchspace tensorspace
-        if self.encode_non_numeric:
-            self._map_param_to_encoded = {}
-            self._map_encoded_to_param = {}
-            self.encoded_params_values = []
-            for i, param_values in enumerate(self.searchspace.params_values):
-                encoded_values = param_values
-                if not all(is_number(v) for v in param_values):
-                    encoded_values = np.arange(
-                        len(param_values)
-                    )  # NOTE when changing this, adjust the rounding in encoded_to_params
-                    self._map_param_to_encoded[i] = dict(zip(param_values, encoded_values))
-                    self._map_encoded_to_param[i] = dict(zip(encoded_values, param_values))
-                self.encoded_params_values.append(encoded_values)
 
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
@@ -254,41 +229,6 @@ def get_bounds(self):
                 bounds.append((values[0], values[-1]))
         return bounds
 
-    def encoded_to_params(self, config):
-        """Convert from an encoded configuration to the real parameters."""
-        if not self.encode_non_numeric:
-            raise ValueError("'encode_non_numeric' must be set to true to use this function.")
-        params = []
-        for i, v in enumerate(config):
-            # params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
-            if i in self._map_encoded_to_param:
-                encoding = self._map_encoded_to_param[i]
-                if v in encoding:
-                    param = encoding[v]
-                elif isinstance(v, float):
-                    # try to resolve a rounding error due to floating point arithmetic / continous solver
-                    param = encoding[round(v)]
-                else:
-                    raise ValueError(f"Encoded value {v} not found in {self._map_encoded_to_param[i]}")
-            else:
-                param = v
-            params.append(param)
-        assert len(params) == len(config)
-        return params
-
-    def params_to_encoded(self, config):
-        """Convert from a parameter configuration to the encoded configuration."""
-        if not self.encode_non_numeric:
-            raise ValueError("'encode_non_numeric' must be set to true to use this function.")
-        encoded = []
-        for i, v in enumerate(config):
-            try:
-                encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
-            except KeyError:
-                raise KeyError(f"{config} parameter value {v} not found in {self._map_param_to_encoded} for parameter {i}.")
-        assert len(encoded) == len(config)
-        return encoded
-
 
 def setup_method_arguments(method, bounds):
     """Prepare method specific arguments."""

From c6917bdf7103667407c1fc512643725baf883d3f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 11 Jul 2025 17:52:49 +0200
Subject: [PATCH 223/253] Implemented get_random_neighbor and helper functions
 in Searchspace, which are much faster to find random neighbors than looking
 up all neighbors and selecting a random one

---
 kernel_tuner/searchspace.py | 107 ++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 501995614..049da7235 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -826,6 +826,91 @@ def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]:
         matching_indices = (num_matching_params == self.num_params - 1).nonzero()[0]
         return matching_indices
 
+    def __get_random_neighbor_hamming(self, param_config: tuple) -> tuple:
+        """Get a random neighbor at 1 Hamming distance from the parameter configuration."""
+        arr = self.get_list_numpy()
+        target = np.array(param_config)
+        assert arr[0].shape == target.shape
+
+        # find the first row that differs from the target in exactly one column, return as soon as one is found
+        random_order_indices = np.random.permutation(arr.shape[0])
+        for i in random_order_indices:
+            # assert arr[i].shape == target.shape, f"Row {i} shape {arr[i].shape} does not match target shape {target.shape}"
+            if np.count_nonzero(arr[i] != target) == 1:
+                return self.get_param_configs_at_indices([i])[0]
+        return None
+
+    def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
+        """Get an approximately random adjacent neighbor of the parameter configuration."""
+        # NOTE: this is not truly random as we only progressively increase the allowed index difference if no neighbors are found, but much faster than generating all neighbors
+
+        # get the indices of the parameter values
+        if self.params_values_indices is None:
+            self.__prepare_neighbors_index()
+        param_config_index = self.get_param_config_index(param_config)
+        param_config_value_indices = (
+            self.get_param_indices(param_config)
+            if param_config_index is None
+            else self.params_values_indices[param_config_index]
+        )
+        max_index_difference_per_param = [max(len(self.params_values[p]) - 1 - i, i) for p, i in enumerate(param_config_value_indices)]
+
+        # calculate the absolute difference between the parameter value indices
+        abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices)
+
+        # calculate the difference between the parameter value indices
+        index_difference = np.abs(self.params_values_indices - param_config_value_indices)
+        # transpose to get the param indices difference per parameter instead of per param config
+        index_difference_transposed = index_difference.transpose()
+
+        # start at an index difference of 1, progressively increase - potentially expensive if there are no neighbors until very late
+        max_index_difference = max(max_index_difference_per_param)
+        allowed_index_difference = 1
+        allowed_values = [[v] for v in param_config]
+        while allowed_index_difference <= max_index_difference:
+            # get the param config indices where the difference is allowed_index_difference or less for each position
+            matching_indices = (np.max(abs_index_difference, axis=1) <= allowed_index_difference).nonzero()[0]
+            # as the selected param config does not differ anywhere, remove it from the matches
+            if param_config_index is not None:
+                matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=False)
+            
+            # if there are matching indices, return a random one
+            if len(matching_indices) > 0:
+                # get the random index from the matching indices
+                random_neighbor_index = np.random.choice(matching_indices)
+                return self.get_param_configs_at_indices([random_neighbor_index])[0]
+
+            # if there are no matching indices, increase the allowed index difference and start over
+            allowed_index_difference += 1
+        return None
+
+        # alternative implementation
+        # # start at an index difference of 1, progressively increase - potentially expensive if there are no neighbors
+        # allowed_index_difference = 1
+        # allowed_values = [[v] for v in param_config]
+        # while evaluated_configs < self.size:
+        #     # for each parameter, add the allowed values
+        #     for i, value in enumerate(param_config):
+        #         param_values = self.tune_params[i]
+        #         current_index = param_values.index(value)
+
+        #         # add lower neighbor (if exists)
+        #         if current_index - allowed_index_difference >= 0:
+        #             allowed_values[i].append(param_values[current_index - allowed_index_difference])
+        #             neighbor_candidates.append(tuple(lower_neighbor))
+
+        #         # add upper neighbor (if exists)
+        #         if current_index + allowed_index_difference < len(param_values):
+        #             allowed_values[i].append(param_values[current_index + allowed_index_difference])
+
+        #     # create the random list of candidate neighbors (Cartesian product of allowed values)
+        #     from itertools import product
+        #     candidate_neighbors = product(*allowed_values)
+        #     for candidate in candidate_neighbors:
+        #       # check if the candidate has not been previously evaluated
+        #       # check if the candidate neighbors are valid
+        # return None
+
     def __get_neighbors_indices_strictlyadjacent(
         self, param_config_index: int = None, param_config: tuple = None
     ) -> List[int]:
@@ -982,6 +1067,28 @@ def get_neighbors(self, param_config: tuple, neighbor_method=None, build_full_ca
         """Get the neighbors for a parameter configuration."""
         return self.get_param_configs_at_indices(self.get_neighbors_indices(param_config, neighbor_method, build_full_cache))
 
+    def get_random_neighbor(self, param_config: tuple, neighbor_method=None) -> tuple:
+        """Get an approximately random neighbor for a parameter configuration. Much faster than taking a random choice of all neighbors, but does not build cache."""
+        if self.are_neighbors_indices_cached(param_config, neighbor_method):
+            neighbors = self.get_neighbors(param_config, neighbor_method)
+            return choice(neighbors)
+        else:
+            # check if there is a neighbor method to use
+            if neighbor_method is None:
+                neighbor_method = self.neighbor_method
+
+            # find the random neighbor based on the method
+            if neighbor_method == "Hamming":
+                return self.__get_random_neighbor_hamming(param_config)
+            elif neighbor_method == "adjacent":
+                return self.__get_random_neighbor_adjacent(param_config)
+            else:
+                # not much performance to be gained for strictly-adjacent neighbors, just generate the neighbors
+                neighbors = self.get_neighbors(param_config, neighbor_method)
+                if len(neighbors) == 0:
+                    return None
+                return choice(neighbors)
+
     def get_param_neighbors(self, param_config: tuple, index: int, neighbor_method: str, randomize: bool) -> list:
         """Get the neighboring parameters at an index."""
         original_value = param_config[index]

From d1d653e5b8e7af620f6a452e4d4946d59570cb60 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 11 Jul 2025 17:53:14 +0200
Subject: [PATCH 224/253] Implemented a test for the new get_random_neighbor
 method

---
 test/test_searchspace.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index da3688889..56d8256e1 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -246,8 +246,16 @@ def test_neighbors_hamming():
         (3, 4, 'string_1'),
     ]
 
+    # test the neighbors
     __test_neighbors(test_config, expected_neighbors, "Hamming")
 
+    # test the random neighbor function
+    neighbors = simple_searchspace.get_neighbors(test_config, "Hamming")
+    for i in range(10):
+        random_neighbor = simple_searchspace.get_random_neighbor(test_config, "Hamming")
+        assert random_neighbor in neighbors
+        assert random_neighbor != test_config
+
 
 def test_neighbors_strictlyadjacent():
     """Test whether the strictly adjacent neighbors are as expected."""
@@ -259,8 +267,16 @@ def test_neighbors_strictlyadjacent():
         (1.5, 5.5, 'string_2'),
     ]
 
+    # test the neighbors
     __test_neighbors(test_config, expected_neighbors, "strictly-adjacent")
 
+    # test the random neighbor function
+    neighbors = simple_searchspace.get_neighbors(test_config, "strictly-adjacent")
+    for i in range(10):
+        random_neighbor = simple_searchspace.get_random_neighbor(test_config, "strictly-adjacent")
+        assert random_neighbor in neighbors
+        assert random_neighbor != test_config
+
 
 def test_neighbors_adjacent():
     """Test whether the adjacent neighbors are as expected."""
@@ -272,8 +288,16 @@ def test_neighbors_adjacent():
         (1.5, 5.5, 'string_2'),
     ]
 
+    # test the neighbors
     __test_neighbors(test_config, expected_neighbors, "adjacent")
 
+    # test the random neighbor function
+    neighbors = simple_searchspace.get_neighbors(test_config, "adjacent")
+    for i in range(10):
+        random_neighbor = simple_searchspace.get_random_neighbor(test_config, "adjacent")
+        assert random_neighbor in neighbors
+        assert random_neighbor != test_config
+
 
 def test_neighbors_fictious():
     """Test whether the neighbors are as expected for a fictious parameter configuration (i.e. not existing in the search space due to restrictions)."""

From 7fcfacb80d20767e2619b6c95a88389368916849 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 11 Jul 2025 18:14:02 +0200
Subject: [PATCH 225/253] Various improvements to random neighbor performance

---
 kernel_tuner/searchspace.py                   | 46 ++++---------------
 .../strategies/simulated_annealing.py         | 10 ++--
 2 files changed, 17 insertions(+), 39 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 049da7235..7b16349d3 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -868,49 +868,22 @@ def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
         allowed_index_difference = 1
         allowed_values = [[v] for v in param_config]
         while allowed_index_difference <= max_index_difference:
-            # get the param config indices where the difference is allowed_index_difference or less for each position
-            matching_indices = (np.max(abs_index_difference, axis=1) <= allowed_index_difference).nonzero()[0]
+            # get the param config indices where the difference is at most allowed_index_difference for each position
+            matching_indices = list((np.max(abs_index_difference, axis=1) <= allowed_index_difference).nonzero()[0])
             # as the selected param config does not differ anywhere, remove it from the matches
             if param_config_index is not None:
-                matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=False)
+                matching_indices.remove(param_config_index)
             
             # if there are matching indices, return a random one
             if len(matching_indices) > 0:
                 # get the random index from the matching indices
-                random_neighbor_index = np.random.choice(matching_indices)
+                random_neighbor_index = choice(matching_indices)
                 return self.get_param_configs_at_indices([random_neighbor_index])[0]
 
             # if there are no matching indices, increase the allowed index difference and start over
             allowed_index_difference += 1
         return None
 
-        # alternative implementation
-        # # start at an index difference of 1, progressively increase - potentially expensive if there are no neighbors
-        # allowed_index_difference = 1
-        # allowed_values = [[v] for v in param_config]
-        # while evaluated_configs < self.size:
-        #     # for each parameter, add the allowed values
-        #     for i, value in enumerate(param_config):
-        #         param_values = self.tune_params[i]
-        #         current_index = param_values.index(value)
-
-        #         # add lower neighbor (if exists)
-        #         if current_index - allowed_index_difference >= 0:
-        #             allowed_values[i].append(param_values[current_index - allowed_index_difference])
-        #             neighbor_candidates.append(tuple(lower_neighbor))
-
-        #         # add upper neighbor (if exists)
-        #         if current_index + allowed_index_difference < len(param_values):
-        #             allowed_values[i].append(param_values[current_index + allowed_index_difference])
-
-        #     # create the random list of candidate neighbors (Cartesian product of allowed values)
-        #     from itertools import product
-        #     candidate_neighbors = product(*allowed_values)
-        #     for candidate in candidate_neighbors:
-        #       # check if the candidate has not been previously evaluated
-        #       # check if the candidate neighbors are valid
-        # return None
-
     def __get_neighbors_indices_strictlyadjacent(
         self, param_config_index: int = None, param_config: tuple = None
     ) -> List[int]:
@@ -926,7 +899,7 @@ def __get_neighbors_indices_strictlyadjacent(
         matching_indices = (np.max(abs_index_difference, axis=1) <= 1).nonzero()[0]
         # as the selected param config does not differ anywhere, remove it from the matches
         if param_config_index is not None:
-            matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=False)
+            matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=True)
         return matching_indices
 
     def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param_config: tuple = None) -> List[int]:
@@ -962,7 +935,7 @@ def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param
         )
         # as the selected param config does not differ anywhere, remove it from the matches
         if param_config_index is not None:
-            matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=False)
+            matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=True)
         return matching_indices
 
     def __build_neighbors_index(self, neighbor_method) -> List[List[int]]:
@@ -1078,10 +1051,11 @@ def get_random_neighbor(self, param_config: tuple, neighbor_method=None) -> tupl
                 neighbor_method = self.neighbor_method
 
             # find the random neighbor based on the method
-            if neighbor_method == "Hamming":
-                return self.__get_random_neighbor_hamming(param_config)
-            elif neighbor_method == "adjacent":
+            if neighbor_method == "adjacent":
                 return self.__get_random_neighbor_adjacent(param_config)
+            # elif neighbor_method == "Hamming":
+            #   this implementation is not as efficient as just generating all neighbors
+            #     return self.__get_random_neighbor_hamming(param_config)
             else:
                 # not much performance to be gained for strictly-adjacent neighbors, just generate the neighbors
                 neighbors = self.get_neighbors(param_config, neighbor_method)
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 2ca9c62ba..3f5b20fb4 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -116,10 +116,14 @@ def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
 
     def random_neighbor(pos, method):
         """Helper method to return a random neighbor."""
-        neighbors = searchspace.get_neighbors(pos, neighbor_method=method)
-        if not neighbors:
+        # neighbors = searchspace.get_neighbors(pos, neighbor_method=method)
+        # if not neighbors:
+        #     return pos
+        # return random.choice(neighbors)
+        neighbor = searchspace.get_random_neighbor(pos, neighbor_method=method)
+        if neighbor is None:
             return pos
-        return random.choice(neighbors)
+        return neighbor
 
     size = len(pos)
 

From 6e61fd62081a9096a205787a7edadc3f8149e9ca Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 11 Jul 2025 18:38:09 +0200
Subject: [PATCH 226/253] Implemented getting a random neighbor over the full
 list where applicable, other improvements

---
 kernel_tuner/searchspace.py                  |  2 +-
 kernel_tuner/strategies/diff_evo.py          |  9 +++------
 kernel_tuner/strategies/genetic_algorithm.py | 21 ++++++++------------
 kernel_tuner/strategies/greedy_ils.py        |  3 +--
 4 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 7b16349d3..daf1b821d 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -1044,7 +1044,7 @@ def get_random_neighbor(self, param_config: tuple, neighbor_method=None) -> tupl
         """Get an approximately random neighbor for a parameter configuration. Much faster than taking a random choice of all neighbors, but does not build cache."""
         if self.are_neighbors_indices_cached(param_config, neighbor_method):
             neighbors = self.get_neighbors(param_config, neighbor_method)
-            return choice(neighbors)
+            return choice(neighbors) if len(neighbors) > 0 else None
         else:
             # check if there is a neighbor method to use
             if neighbor_method is None:
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 3240b5b2e..8cd3af53e 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -389,13 +389,10 @@ def repair(trial_vector, searchspace):
         # search for valid configurations neighboring trial_vector
         # start from strictly-adjacent to increasingly allowing more neighbors
         for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
-            neighbors = searchspace.get_neighbors_no_cache(tuple(trial_vector), neighbor_method=neighbor_method)
-
-            # if we have found valid neighboring configurations, select one at random
-            if len(neighbors) > 0:
-                new_trial_vector = list(random.choice(neighbors))
+            new_trial_vector = searchspace.get_random_neighbor(tuple(trial_vector), neighbor_method=neighbor_method)
+            if new_trial_vector is not None:
                 print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}")
-                return new_trial_vector
+                return list(new_trial_vector)
 
     return trial_vector
 
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 76fa3d314..2da9d356f 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -155,17 +155,14 @@ def random_index_weighted(pop_size):
         return [population[ind][0] for ind in chosen]
 
 
-    def mutate(self, dna, cache=False):
+    def mutate(self, dna):
         """Mutate DNA with 1/mutation_chance chance."""
         # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
         if int(random.random() * self.mutation_chance) == 0:
             if self.constraint_aware:
-                if cache:
-                    neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
-                else:
-                    neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
-                if len(neighbors) > 0:
-                    return list(random.choice(neighbors))
+                neighbor = self.searchspace.get_random_neighbor(tuple(dna), neighbor_method="Hamming")
+                if neighbor is not None:
+                    return list(neighbor)
             else:
                 # select a tunable parameter at random
                 mutate_index = random.randint(0, len(self.tune_params)-1)
@@ -187,13 +184,11 @@ def repair(self, dna):
             # search for valid configurations neighboring this config
             # start from strictly-adjacent to increasingly allowing more neighbors
             for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
-                neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method=neighbor_method)
-
+                neighbor = self.searchspace.get_random_neighbor(tuple(dna), neighbor_method=neighbor_method)
                 # if we have found valid neighboring configurations, select one at random
-                if len(neighbors) > 0:
-                    new_dna = list(random.choice(neighbors))
-                    # print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
-                    return new_dna
+                if neighbor is not None:
+                    # print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {neighbor=}")
+                    return list(neighbor)
 
         return dna
 
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
index 9a78e795a..d9cf67ecc 100644
--- a/kernel_tuner/strategies/greedy_ils.py
+++ b/kernel_tuner/strategies/greedy_ils.py
@@ -60,8 +60,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 tune.__doc__ = common.get_strategy_docstring("Greedy Iterative Local Search (ILS)", _options)
 
 def mutate(indiv, searchspace: Searchspace):
-    neighbors = searchspace.get_neighbors(tuple(indiv), neighbor_method="Hamming")
-    return list(random_choice(neighbors))
+    return list(searchspace.get_random_neighbor(tuple(indiv), neighbor_method="Hamming"))
 
 
 def random_walk(indiv, permutation_size, no_improve, last_improve, searchspace: Searchspace):

From 83c40436170d9accbe68d30ec5cbaa5306434a1c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 11 Jul 2025 18:52:55 +0200
Subject: [PATCH 227/253] Adjusted costfunc_kwargs defaults

---
 kernel_tuner/strategies/wrapper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py
index 1a928ab17..d6d91f2dd 100644
--- a/kernel_tuner/strategies/wrapper.py
+++ b/kernel_tuner/strategies/wrapper.py
@@ -11,7 +11,7 @@ class OptAlg(ABC):
     """Base class for user-defined optimization algorithms."""
 
     def __init__(self):
-        self.costfunc_kwargs = {"scaling": True, "snap": True}
+        self.costfunc_kwargs = {"scaling": False, "snap": False}
 
     @abstractmethod
     def __call__(self, func: CostFunc, searchspace: Searchspace) -> tuple[tuple, float]:
@@ -36,7 +36,7 @@ def __init__(self, optimizer: OptAlg):
     def tune(self, searchspace: Searchspace, runner, tuning_options):
         cost_func = CostFunc(searchspace, tuning_options, runner, **self.optimizer.costfunc_kwargs)
 
-        if self.optimizer.costfunc_kwargs.get('scaling', True):
+        if self.optimizer.costfunc_kwargs.get('scaling', False):
             # Initialize costfunc for scaling
             cost_func.get_bounds_x0_eps()
 

From 623eca466b96af11d1aacc6d394abd1253d0ec9c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 12 Jul 2025 01:46:53 +0200
Subject: [PATCH 228/253] Implemented partial neighbor caching, adding more
 information to the overall neighbor knowledge with every lookup

---
 kernel_tuner/searchspace.py                   | 101 ++++++++++++++----
 .../strategies/simulated_annealing.py         |   4 -
 test/test_custom_optimizer.py                 |   1 +
 3 files changed, 83 insertions(+), 23 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index daf1b821d..3dc382ec1 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -6,6 +6,7 @@
 from typing import List, Union
 from warnings import warn
 from copy import deepcopy
+from collections import defaultdict
 
 import numpy as np
 from constraint import (
@@ -104,6 +105,7 @@ def __init__(
         self.build_neighbors_index = build_neighbors_index
         self.solver_method = solver_method
         self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods }
+        self.__neighbor_partial_cache = { method: defaultdict(list) for method in supported_neighbor_methods }
         self.neighbors_index = dict()
         self.neighbor_method = neighbor_method
         if (neighbor_method is not None or build_neighbors_index) and neighbor_method not in supported_neighbor_methods:
@@ -837,6 +839,7 @@ def __get_random_neighbor_hamming(self, param_config: tuple) -> tuple:
         for i in random_order_indices:
             # assert arr[i].shape == target.shape, f"Row {i} shape {arr[i].shape} does not match target shape {target.shape}"
             if np.count_nonzero(arr[i] != target) == 1:
+                self.__add_to_neighbor_partial_cache(param_config, [i], full_neighbors=False)
                 return self.get_param_configs_at_indices([i])[0]
         return None
 
@@ -876,7 +879,9 @@ def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
             
             # if there are matching indices, return a random one
             if len(matching_indices) > 0:
-                # get the random index from the matching indices
+                self.__add_to_neighbor_partial_cache(param_config, matching_indices, full_neighbors=allowed_index_difference == max_index_difference)
+                
+                # get a random index from the matching indices
                 random_neighbor_index = choice(matching_indices)
                 return self.get_param_configs_at_indices([random_neighbor_index])[0]
 
@@ -884,6 +889,23 @@ def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
             allowed_index_difference += 1
         return None
 
+    def __add_to_neighbor_partial_cache(self, param_config: tuple, neighbor_indices: List[int], neighbor_method: str, full_neighbors = False):
+        """Add the neighbor indices to the partial cache using the given parameter configuration."""
+        param_config_index = self.get_param_config_index(param_config)
+        if param_config_index is None:
+            return  # we need a valid parameter configuration to add to the cache
+        # add the indices to the partial cache for the parameter configuration
+        if full_neighbors:
+            self.__neighbor_partial_cache[neighbor_method][param_config_index] = neighbor_indices
+        else:
+            for neighbor_index in neighbor_indices:
+                if neighbor_index not in self.__neighbor_partial_cache[neighbor_method][param_config_index]:
+                    self.__neighbor_partial_cache[neighbor_method][param_config_index].append(neighbor_index)
+        # add the parameter configuration index to the partial cache for each neighbor
+        for neighbor_index in neighbor_indices:
+            if param_config_index not in self.__neighbor_partial_cache[neighbor_method][neighbor_index]:
+                self.__neighbor_partial_cache[neighbor_method][neighbor_index].append(param_config_index)
+
     def __get_neighbors_indices_strictlyadjacent(
         self, param_config_index: int = None, param_config: tuple = None
     ) -> List[int]:
@@ -1022,6 +1044,10 @@ def get_neighbors_indices(self, param_config: tuple, neighbor_method=None, build
         if neighbors is None:
             neighbors = self.get_neighbors_indices_no_cache(param_config, neighbor_method, build_full_cache)
             self.__neighbor_cache[neighbor_method][param_config] = neighbors
+            self.__add_to_neighbor_partial_cache(param_config, neighbors, neighbor_method, full_neighbors=True)
+            if neighbor_method == "strictly-adjacent":
+                # any neighbor in strictly-adjacent is also an adjacent neighbor
+                self.__add_to_neighbor_partial_cache(param_config, neighbors, "adjacent", full_neighbors=False)
         return neighbors
 
     def are_neighbors_indices_cached(self, param_config: tuple, neighbor_method=None) -> bool:
@@ -1040,28 +1066,65 @@ def get_neighbors(self, param_config: tuple, neighbor_method=None, build_full_ca
         """Get the neighbors for a parameter configuration."""
         return self.get_param_configs_at_indices(self.get_neighbors_indices(param_config, neighbor_method, build_full_cache))
 
-    def get_random_neighbor(self, param_config: tuple, neighbor_method=None) -> tuple:
-        """Get an approximately random neighbor for a parameter configuration. Much faster than taking a random choice of all neighbors, but does not build cache."""
+    def get_partial_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> List[tuple]:
+        """Get the partial neighbors for a parameter configuration."""
+        if neighbor_method is None:
+            neighbor_method = self.neighbor_method
+            if neighbor_method is None:
+                raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set")
+        param_config_index = self.get_param_config_index(param_config)
+        if param_config_index is None or param_config_index not in self.__neighbor_partial_cache[neighbor_method]:
+            return []
+        return self.get_param_configs_at_indices(self.__neighbor_partial_cache[neighbor_method][param_config_index])
+
+    def pop_random_partial_neighbor(self, param_config: tuple, neighbor_method=None, threshold=2) -> tuple:
+        """Pop a random partial neighbor for a given a parameter configuration if there are at least `threshold` neighbors."""
+        if neighbor_method is None:
+            neighbor_method = self.neighbor_method
+            if neighbor_method is None:
+                raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set")
+        param_config_index = self.get_param_config_index(param_config)
+        if param_config_index is None or param_config_index not in self.__neighbor_partial_cache[neighbor_method]:
+            return None
+        partial_neighbors = self.get_param_configs_at_indices(self.__neighbor_partial_cache[neighbor_method][param_config_index])
+        if len(partial_neighbors) < threshold:
+            return None
+        partial_neighbor_index = choice(range(len(partial_neighbors)))
+        random_neighbor = self.__neighbor_partial_cache[neighbor_method][param_config_index].pop(partial_neighbor_index)
+        return self.get_param_configs_at_indices([random_neighbor])[0]
+
+    def get_random_neighbor(self, param_config: tuple, neighbor_method=None, use_partial_cache=True) -> tuple:
+        """Get an approximately random neighbor for a parameter configuration. Much faster than taking a random choice of all neighbors, but does not build full cache."""
         if self.are_neighbors_indices_cached(param_config, neighbor_method):
             neighbors = self.get_neighbors(param_config, neighbor_method)
             return choice(neighbors) if len(neighbors) > 0 else None
-        else:
-            # check if there is a neighbor method to use
+        elif use_partial_cache:
+            # pop the chosen neighbor from the cache to avoid choosing it again until it is re-added
+            random_neighbor = self.pop_random_partial_neighbor(param_config, neighbor_method)
+            if random_neighbor is not None:
+                return random_neighbor
+    
+        # check if there is a neighbor method to use
+        if neighbor_method is None:
+            neighbor_method = self.neighbor_method
             if neighbor_method is None:
-                neighbor_method = self.neighbor_method
-
-            # find the random neighbor based on the method
-            if neighbor_method == "adjacent":
-                return self.__get_random_neighbor_adjacent(param_config)
-            # elif neighbor_method == "Hamming":
-            #   this implementation is not as efficient as just generating all neighbors
-            #     return self.__get_random_neighbor_hamming(param_config)
-            else:
-                # not much performance to be gained for strictly-adjacent neighbors, just generate the neighbors
-                neighbors = self.get_neighbors(param_config, neighbor_method)
-                if len(neighbors) == 0:
-                    return None
-                return choice(neighbors)
+                raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set")
+
+        # oddly enough, the custom random neighbor methods are not faster than just generating all neighbor + partials
+        # # find the random neighbor based on the method
+        # if neighbor_method == "adjacent":
+        #     return self.__get_random_neighbor_adjacent(param_config)
+        # elif neighbor_method == "Hamming":
+        #   this implementation is not as efficient as just generating all neighbors
+        #     return self.__get_random_neighbor_hamming(param_config)
+        # # else:
+        #    # not much performance to be gained for strictly-adjacent neighbors, just generate the neighbors
+
+        # calculate the full neighbors and return a random one
+        neighbors = self.get_neighbors(param_config, neighbor_method)
+        if len(neighbors) == 0:
+            return None
+        return choice(neighbors)
 
     def get_param_neighbors(self, param_config: tuple, index: int, neighbor_method: str, randomize: bool) -> list:
         """Get the neighboring parameters at an index."""
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 3f5b20fb4..ee4f1355c 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -116,10 +116,6 @@ def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
 
     def random_neighbor(pos, method):
         """Helper method to return a random neighbor."""
-        # neighbors = searchspace.get_neighbors(pos, neighbor_method=method)
-        # if not neighbors:
-        #     return pos
-        # return random.choice(neighbors)
         neighbor = searchspace.get_random_neighbor(pos, neighbor_method=method)
         if neighbor is None:
             return pos
diff --git a/test/test_custom_optimizer.py b/test/test_custom_optimizer.py
index 4d9b1c125..6e40e5aaf 100644
--- a/test/test_custom_optimizer.py
+++ b/test/test_custom_optimizer.py
@@ -16,6 +16,7 @@ class HybridDELocalRefinement(OptAlg):
 
     def __init__(self):
         super().__init__()
+        self.costfunc_kwargs = {"scaling": True, "snap": True}
         # You can adjust these hyperparameters based on experimentation/tuning:
         self.F = 0.8        # Differential weight
         self.CR = 0.9       # Crossover probability

From f671c0e5b4a8041973164b6150cc69a517f15664 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 12 Jul 2025 01:55:03 +0200
Subject: [PATCH 229/253] Fix to add neighbor method as argument

---
 kernel_tuner/searchspace.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 3dc382ec1..0bb193d9e 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -839,7 +839,7 @@ def __get_random_neighbor_hamming(self, param_config: tuple) -> tuple:
         for i in random_order_indices:
             # assert arr[i].shape == target.shape, f"Row {i} shape {arr[i].shape} does not match target shape {target.shape}"
             if np.count_nonzero(arr[i] != target) == 1:
-                self.__add_to_neighbor_partial_cache(param_config, [i], full_neighbors=False)
+                self.__add_to_neighbor_partial_cache(param_config, [i], "Hamming", full_neighbors=False)
                 return self.get_param_configs_at_indices([i])[0]
         return None
 
@@ -879,7 +879,7 @@ def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
             
             # if there are matching indices, return a random one
             if len(matching_indices) > 0:
-                self.__add_to_neighbor_partial_cache(param_config, matching_indices, full_neighbors=allowed_index_difference == max_index_difference)
+                self.__add_to_neighbor_partial_cache(param_config, matching_indices, "adjacent", full_neighbors=allowed_index_difference == max_index_difference)
                 
                 # get a random index from the matching indices
                 random_neighbor_index = choice(matching_indices)

From b45f8808b5b58fa82774d6ac1235f32e520d14b0 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 17 Jul 2025 16:36:04 +0200
Subject: [PATCH 230/253] Extended differential evolution hyperparameters to
 fit new algorithm implementation, minor improvements

---
 kernel_tuner/hyper.py               | 7 ++++---
 kernel_tuner/strategies/diff_evo.py | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 97bc01567..335783ed1 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -130,9 +130,10 @@ def put_if_not_present(target_dict, key, value):
         }
     elif strategy_to_tune.lower() == "diff_evo":
         hyperparams = {
-            'method': ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"],
-            'popsize': [10, 20, 30],
-            'maxiter': [50, 100, 150],
+            'method': ["best1bin", "rand1bin", "best2bin", "rand2bin", "best1exp", "rand1exp", "best2exp", "rand2exp", "currenttobest1bin", "currenttobest1exp", "randtobest1bin", "randtobest1exp"],   # best1bin
+            'popsize': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],   # 50
+            'F': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0],  # 1.3
+            'CR': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]  # 0.9
         }
     elif strategy_to_tune.lower() == "basinhopping":
         hyperparams = {
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 8cd3af53e..24ac299f0 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -11,7 +11,7 @@
 
 _options = dict(
     popsize=("population size", 50),
-    maxiter=("maximum number of generations", 200),
+    maxiter=("maximum number of generations", 1e12),    # very large to avoid early stopping (stopping is managed by StopCriterionReached)
     F=("mutation factor (differential weight)", 1.3),
     CR=("crossover rate", 0.9),
     method=("method", "best1bin"),
@@ -391,7 +391,7 @@ def repair(trial_vector, searchspace):
         for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
             new_trial_vector = searchspace.get_random_neighbor(tuple(trial_vector), neighbor_method=neighbor_method)
             if new_trial_vector is not None:
-                print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}")
+                # print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}")
                 return list(new_trial_vector)
 
     return trial_vector

From f31f67cdb8f7d6fa1ffd5231592766d5729c9e83 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 17 Jul 2025 22:24:42 +0200
Subject: [PATCH 231/253] Implemented retrieving the true tunable parameters
 (those that are not constant after restrictions) from the search space

---
 kernel_tuner/searchspace.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 0bb193d9e..4edde95c2 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -104,6 +104,7 @@ def __init__(
         self.params_values_indices = None
         self.build_neighbors_index = build_neighbors_index
         self.solver_method = solver_method
+        self.__true_tune_params = None
         self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods }
         self.__neighbor_partial_cache = { method: defaultdict(list) for method in supported_neighbor_methods }
         self.neighbors_index = dict()
@@ -687,6 +688,20 @@ def get_list_numpy(self) -> np.ndarray:
             self.__numpy = np.array(self.list)
         return self.__numpy
 
+    def get_true_tunable_params(self) -> dict:
+        """Get the tunable parameters that are actually tunable, i.e. not constant after restrictions."""
+        if self.__true_tune_params is None:
+            true_tune_params = dict()
+            numpy_list = self.get_list_numpy()
+            for param_index, (param_name, param_values) in enumerate(self.tune_params.items()):
+                if len(param_values) == 1:
+                    continue    # if the parameter is constant, skip it
+                if not np.all(numpy_list[:, param_index] == numpy_list[0, param_index]):
+                    # if after restrictions there are different values, register the parameter
+                    true_tune_params[param_name] = param_values
+            self.__true_tune_params = true_tune_params
+        return self.__true_tune_params
+
     def get_param_indices(self, param_config: tuple) -> tuple:
         """For each parameter value in the param config, find the index in the tunable parameters."""
         try:

From bb152f270757ae65bdd0fabe989e63fa2cfac98d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 17 Jul 2025 22:26:12 +0200
Subject: [PATCH 232/253] Implemented number of dimensions-dependent population
 size in diff_evo

---
 kernel_tuner/strategies/diff_evo.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 24ac299f0..a60bcff52 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -11,7 +11,8 @@
 
 _options = dict(
     popsize=("population size", 50),
-    maxiter=("maximum number of generations", 1e12),    # very large to avoid early stopping (stopping is managed by StopCriterionReached)
+    popsize_times_dimensions=("multiply population size with number of dimensions (True/False)", False),
+    maxiter=("maximum number of generations", int(1e15)),    # very large to avoid early stopping (stopping is managed by StopCriterionReached)
     F=("mutation factor (differential weight)", 1.3),
     CR=("crossover rate", 0.9),
     method=("method", "best1bin"),
@@ -39,7 +40,10 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     bounds = cost_func.get_bounds()
 
     options = tuning_options.strategy_options
-    popsize, maxiter, F, CR, method, constraint_aware = common.get_options(options, _options)
+    popsize, popsize_times_dimensions, maxiter, F, CR, method, constraint_aware = common.get_options(options, _options)
+    if popsize_times_dimensions:
+        popsize *= min(len(searchspace.get_true_tunable_params()), searchspace.size)
+    maxiter = min(maxiter, searchspace.size)
 
     if method not in supported_methods:
         raise ValueError(f"Error {method} not supported, {supported_methods=}")

From b7e779e907e7c04bd0e3b37a5259930e328ac5bd Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 17 Jul 2025 22:27:31 +0200
Subject: [PATCH 233/253] If there is a problem loading the cache, the path of
 the cachefile is given

---
 kernel_tuner/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index f3c830e0b..525cbf341 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1229,7 +1229,7 @@ def process_cache(cache, kernel_options, tuning_options, runner):
                 )
             raise ValueError(
                 f"Cannot load cache which contains results obtained with different tunable parameters. \
-                Cache has: {cached_data['tune_params_keys']}, tuning_options has: {list(tuning_options.tune_params.keys())}"
+                Cache at '{cache}' has: {cached_data['tune_params_keys']}, tuning_options has: {list(tuning_options.tune_params.keys())}"
             )
 
         tuning_options.cachefile = cache

From a89af661aae7cd0d2b4a26d5a4d7b1bd4966d9aa Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 17 Jul 2025 22:55:19 +0200
Subject: [PATCH 234/253] Implemented passing keyword arguments like meta
 strategy and time limit to hyperparameter tuning CLI

---
 kernel_tuner/hyper.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 335783ed1..0bdb513e7 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -5,6 +5,8 @@
 from random import randint
 from argparse import ArgumentParser
 
+import numpy as np
+
 import kernel_tuner
 
 
@@ -94,11 +96,26 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":
+    """Main function to run the hyperparameter tuning. Run with `python hyper.py strategy_to_tune=`."""
+
     parser = ArgumentParser()
-    parser.add_argument("strategy_to_tune")
+    parser.add_argument("strategy_to_tune", type=str, help="The strategy to tune hyperparameters for.")
+    parser.add_argument("--meta_strategy", nargs='?', default="genetic_algorithm", type=str, help="The meta-strategy to use for hyperparameter tuning.")
+    parser.add_argument("--max_time", nargs='?', default=60*60*24, type=int, help="The maximum time in seconds for the hyperparameter tuning.")
     args = parser.parse_args()
     strategy_to_tune = args.strategy_to_tune
 
+    kwargs = dict(
+        verbose=True,
+        quiet=False,
+        simulation_mode=False,
+        strategy=args.meta_strategy,
+        cache=f"hyperparamtuning_t={strategy_to_tune}_m={args.meta_strategy}.json",
+        strategy_options=dict(
+            time_limit=args.max_time,
+        )
+    )
+
     # select the hyperparameter parameters for the selected optimization algorithm
     restrictions = []
     if strategy_to_tune.lower() == "pso":
@@ -131,9 +148,10 @@ def put_if_not_present(target_dict, key, value):
     elif strategy_to_tune.lower() == "diff_evo":
         hyperparams = {
             'method': ["best1bin", "rand1bin", "best2bin", "rand2bin", "best1exp", "rand1exp", "best2exp", "rand2exp", "currenttobest1bin", "currenttobest1exp", "randtobest1bin", "randtobest1exp"],   # best1bin
-            'popsize': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],   # 50
-            'F': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0],  # 1.3
-            'CR': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]  # 0.9
+            'popsize': list(range(1, 100+1, 1)),   # 50
+            'popsize_times_dimensions': [True, False],  # False
+            'F': list(np.arange(0.05, 2.0+0.05, 0.05)),  # 1.3
+            'CR': list(np.arange(0.05, 1.0+0.05, 0.05))  # 0.9
         }
     elif strategy_to_tune.lower() == "basinhopping":
         hyperparams = {
@@ -172,6 +190,6 @@ def put_if_not_present(target_dict, key, value):
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 
     # run the hyperparameter tuning
-    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams, restrictions=restrictions)
+    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams, restrictions=restrictions, **kwargs)
     print(result)
     print(env['best_config'])

From 8bd5991a462e7e53476a4842dee1449bfa2abbcc Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 18 Jul 2025 15:48:00 +0200
Subject: [PATCH 235/253] Improved matching problem size check

---
 kernel_tuner/util.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 525cbf341..2d9e3f1b3 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1147,7 +1147,9 @@ def compile_restrictions(
 
 def check_matching_problem_size(cached_problem_size, problem_size):
     """Check the if requested problem size matches the problem size in the cache."""
-    if not (np.array(cached_problem_size) == np.array(problem_size)).all():
+    cached_problem_size_arr = np.array(cached_problem_size)
+    problem_size_arr = np.array(problem_size)
+    if cached_problem_size_arr.size != problem_size_arr.size or not (cached_problem_size_arr == problem_size_arr).all():
         raise ValueError(f"Cannot load cache which contains results for different problem_size, cache: {cached_problem_size}, requested: {problem_size}")
 
 def process_cache(cache, kernel_options, tuning_options, runner):

From 89891a8337630739619bdda2a14109fefbc38354 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 19 Jul 2025 02:34:08 +0200
Subject: [PATCH 236/253] Implemented a discrete version of Latin Hypercube
 sampling that is (semi) constraint-aware

---
 kernel_tuner/searchspace.py         | 51 +++++++++++++++++++++++++++++
 kernel_tuner/strategies/diff_evo.py | 13 +++++---
 2 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 4edde95c2..13513bfdd 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -8,6 +8,9 @@
 from copy import deepcopy
 from collections import defaultdict
 
+from scipy.stats.qmc import LatinHypercube
+from sklearn.neighbors import NearestNeighbors
+
 import numpy as np
 from constraint import (
     BacktrackingSolver,
@@ -1015,6 +1018,54 @@ def get_random_sample(self, num_samples: int) -> List[tuple]:
             num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
+    def get_latin_hypercube_sample(self, n_samples: int, seed: int = None) -> List[tuple]:
+        """Perform Latin Hypercube Sampling over a set of valid discrete configurations.
+
+        Parameters:
+        - n_samples: int
+            The number of LHS samples to draw.
+        - seed: int or None
+            Random seed for reproducibility.
+
+        Returns:
+        - sample: np.ndarray of shape (n_samples, n_dims)
+            The sampled configurations using LHS.
+        """
+        if n_samples > self.size:
+            raise ValueError(f"Cannot sample more points ({n_samples}) than available configurations ({self.size})")
+
+        # get the configurations
+        configs = self.get_list_numpy()
+        n_dims = configs.shape[1]
+
+        # encode non-numeric values to their index in the parameter values
+        for i in range(n_dims):
+            if not np.issubdtype(configs[:, i].dtype, np.number):
+                # convert categorical values to indices
+                unique_values, inverse_indices = np.unique(configs[:, i], return_inverse=True)
+                configs[:, i] = inverse_indices
+
+        # Normalize valid configurations to [0, 1]^d
+        scaler_min = configs.min(axis=0)
+        scaler_max = configs.max(axis=0)
+        normalized = (configs - scaler_min) / (scaler_max - scaler_min + 1e-9)
+
+        # Generate LHS samples in [0, 1]^d
+        sampler = LatinHypercube(d=n_dims, seed=seed)
+        lhs_points = sampler.random(n=n_samples)
+
+        # Match LHS points to the closest valid config
+        nn = NearestNeighbors(n_neighbors=1)
+        nn.fit(normalized)
+        _, indices = nn.kneighbors(lhs_points)
+        selected_indices = np.unique(indices.flatten())
+
+        # if duplicates were removed, add random unique configs until we have enough
+        if len(selected_indices) < n_samples:
+            selected_indices = np.concatenate([selected_indices, self.get_random_sample_indices(n_samples - len(selected_indices))])
+
+        return self.get_param_configs_at_indices(selected_indices)
+
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""
         param_config_index = self.get_param_config_index(param_config)
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index a60bcff52..b7172677d 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -103,11 +103,16 @@ def random_draw(idxs, mutate, best):
 
 
 def generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware):
-    """ Generate new population, returns Numpy array """
+    """Generate new population, returns Numpy array."""
     if constraint_aware:
-        samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
-        population = [indices_to_values(sample, tune_params) for sample in samples]
-        population = [repair(individual, searchspace) for individual in population]
+        samples = searchspace.get_latin_hypercube_sample(popsize)
+        population = [list(sample) for sample in samples]
+        # for sample in samples:
+        #     if not searchspace.is_param_config_valid(tuple(sample)):
+        #         raise ValueError(f"Invalid sample {sample} generated by Latin Hypercube Sampling.")
+        # samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
+        # population = [indices_to_values(sample, tune_params) for sample in samples]
+        # population = [repair(individual, searchspace) for individual in population]
     else:
         population = []
         for _ in range(popsize):

From b91a6504276865c312be769b723887efb141cf41 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sun, 20 Jul 2025 00:24:02 +0200
Subject: [PATCH 237/253] Implemented efficiently representing the full search
 space as the indices of the parameter values

---
 kernel_tuner/searchspace.py         | 94 +++++++++++++----------------
 kernel_tuner/strategies/diff_evo.py | 11 +---
 2 files changed, 45 insertions(+), 60 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 13513bfdd..1f13615d7 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -8,9 +8,6 @@
 from copy import deepcopy
 from collections import defaultdict
 
-from scipy.stats.qmc import LatinHypercube
-from sklearn.neighbors import NearestNeighbors
-
 import numpy as np
 from constraint import (
     BacktrackingSolver,
@@ -107,6 +104,9 @@ def __init__(
         self.params_values_indices = None
         self.build_neighbors_index = build_neighbors_index
         self.solver_method = solver_method
+        self.__tune_params_to_index_lookup = None
+        self.__tune_params_from_index_lookup = None
+        self.__list_param_indices = None
         self.__true_tune_params = None
         self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods }
         self.__neighbor_partial_cache = { method: defaultdict(list) for method in supported_neighbor_methods }
@@ -691,6 +691,34 @@ def get_list_numpy(self) -> np.ndarray:
             self.__numpy = np.array(self.list)
         return self.__numpy
 
+    def get_list_param_indices_numpy(self) -> np.ndarray:
+        """Get the parameter space list as a NumPy array of parameter value indices. 
+        
+        Same as mapping `get_param_indices` over the searchspace, but faster.
+        Assumes that the parameter configs have the same order as `tune_params`.
+
+        Returns:
+            the NumPy array.
+        """
+        if self.__list_param_indices is None:
+            tune_params_to_index_lookup = list()
+            tune_params_from_index_lookup = list()
+            for param_name, param_values in self.tune_params.items():
+                tune_params_to_index_lookup.append({ value: index for index, value in enumerate(param_values) })
+                tune_params_from_index_lookup.append({ index: value for index, value in enumerate(param_values) })
+            
+            # build the list
+            list_param_indices = list()
+            for param_config in self.list:
+                list_param_indices.append([tune_params_to_index_lookup[index][val] for index, val in enumerate(param_config)])
+
+            # register the computed results
+            self.__tune_params_to_index_lookup = tune_params_to_index_lookup
+            self.__tune_params_from_index_lookup = tune_params_from_index_lookup
+            self.__list_param_indices = np.array(list_param_indices)
+            assert self.__list_param_indices.shape == (self.size, self.num_params)
+        return self.__list_param_indices
+
     def get_true_tunable_params(self) -> dict:
         """Get the tunable parameters that are actually tunable, i.e. not constant after restrictions."""
         if self.__true_tune_params is None:
@@ -707,6 +735,9 @@ def get_true_tunable_params(self) -> dict:
 
     def get_param_indices(self, param_config: tuple) -> tuple:
         """For each parameter value in the param config, find the index in the tunable parameters."""
+        if self.__tune_params_to_index_lookup is not None:
+            # if the lookup is already computed, use it
+            return tuple([self.__tune_params_to_index_lookup[index][param_value] for index, param_value in enumerate(param_config)])
         try:
             return tuple(self.params_values[index].index(param_value) for index, param_value in enumerate(param_config))
         except ValueError as e:
@@ -717,6 +748,13 @@ def get_param_indices(self, param_config: tuple) -> tuple:
                         f"Parameter value {param_value} ({type(param_value)}) is not in the list of values {self.params_values[index]}"
                     ) from e
 
+    def get_param_config_from_param_indices(self, param_indices: tuple) -> tuple:
+        """Get the parameter configuration from the given parameter indices."""
+        if self.__tune_params_from_index_lookup is not None:
+            # if the lookup is already computed, use it
+            return tuple([self.__tune_params_from_index_lookup[index][param_index] for index, param_index in enumerate(param_indices)])
+        return tuple(self.params_values[index][param_index] for index, param_index in enumerate(param_indices))
+
     def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         """Get the param configs at the given indices."""
         # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices)
@@ -838,7 +876,7 @@ def get_tensorspace_bounds(self):
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""
-        self.params_values_indices = np.array(list(self.get_param_indices(param_config) for param_config in self.list))
+        self.params_values_indices = self.get_list_param_indices_numpy()
 
     def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]:
         """Get the neighbors using Hamming distance from the parameter configuration."""
@@ -1018,54 +1056,6 @@ def get_random_sample(self, num_samples: int) -> List[tuple]:
             num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
-    def get_latin_hypercube_sample(self, n_samples: int, seed: int = None) -> List[tuple]:
-        """Perform Latin Hypercube Sampling over a set of valid discrete configurations.
-
-        Parameters:
-        - n_samples: int
-            The number of LHS samples to draw.
-        - seed: int or None
-            Random seed for reproducibility.
-
-        Returns:
-        - sample: np.ndarray of shape (n_samples, n_dims)
-            The sampled configurations using LHS.
-        """
-        if n_samples > self.size:
-            raise ValueError(f"Cannot sample more points ({n_samples}) than available configurations ({self.size})")
-
-        # get the configurations
-        configs = self.get_list_numpy()
-        n_dims = configs.shape[1]
-
-        # encode non-numeric values to their index in the parameter values
-        for i in range(n_dims):
-            if not np.issubdtype(configs[:, i].dtype, np.number):
-                # convert categorical values to indices
-                unique_values, inverse_indices = np.unique(configs[:, i], return_inverse=True)
-                configs[:, i] = inverse_indices
-
-        # Normalize valid configurations to [0, 1]^d
-        scaler_min = configs.min(axis=0)
-        scaler_max = configs.max(axis=0)
-        normalized = (configs - scaler_min) / (scaler_max - scaler_min + 1e-9)
-
-        # Generate LHS samples in [0, 1]^d
-        sampler = LatinHypercube(d=n_dims, seed=seed)
-        lhs_points = sampler.random(n=n_samples)
-
-        # Match LHS points to the closest valid config
-        nn = NearestNeighbors(n_neighbors=1)
-        nn.fit(normalized)
-        _, indices = nn.kneighbors(lhs_points)
-        selected_indices = np.unique(indices.flatten())
-
-        # if duplicates were removed, add random unique configs until we have enough
-        if len(selected_indices) < n_samples:
-            selected_indices = np.concatenate([selected_indices, self.get_random_sample_indices(n_samples - len(selected_indices))])
-
-        return self.get_param_configs_at_indices(selected_indices)
-
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""
         param_config_index = self.get_param_config_index(param_config)
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index b7172677d..23b67518e 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -105,14 +105,9 @@ def random_draw(idxs, mutate, best):
 def generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware):
     """Generate new population, returns Numpy array."""
     if constraint_aware:
-        samples = searchspace.get_latin_hypercube_sample(popsize)
-        population = [list(sample) for sample in samples]
-        # for sample in samples:
-        #     if not searchspace.is_param_config_valid(tuple(sample)):
-        #         raise ValueError(f"Invalid sample {sample} generated by Latin Hypercube Sampling.")
-        # samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
-        # population = [indices_to_values(sample, tune_params) for sample in samples]
-        # population = [repair(individual, searchspace) for individual in population]
+        samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
+        population = [indices_to_values(sample, tune_params) for sample in samples]
+        population = [repair(individual, searchspace) for individual in population]
     else:
         population = []
         for _ in range(popsize):

From fa293cff973e06382f5a9ab503c834dd836b8134 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 21 Jul 2025 08:55:26 +0200
Subject: [PATCH 238/253] Implemented getting the numpy array as numerical
 values, where non-numerical values are replaced with their index

---
 kernel_tuner/searchspace.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 1f13615d7..c2d96c1f0 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -104,9 +104,12 @@ def __init__(
         self.params_values_indices = None
         self.build_neighbors_index = build_neighbors_index
         self.solver_method = solver_method
+        self.tune_param_is_numeric = { param_name: all(isinstance(val, (int, float)) and not any(isinstance(val, bool)) for param_name, val in tune_params.items()) }
+        self.tune_param_is_numeric_mask = np.array(tune_param_is_numeric.values(), dtype=bool)
         self.__tune_params_to_index_lookup = None
         self.__tune_params_from_index_lookup = None
         self.__list_param_indices = None
+        self.__list_numpy_numeric = None
         self.__true_tune_params = None
         self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods }
         self.__neighbor_partial_cache = { method: defaultdict(list) for method in supported_neighbor_methods }
@@ -718,6 +721,19 @@ def get_list_param_indices_numpy(self) -> np.ndarray:
             self.__list_param_indices = np.array(list_param_indices)
             assert self.__list_param_indices.shape == (self.size, self.num_params)
         return self.__list_param_indices
+    
+    def get_list_numpy_numeric(self) -> np.ndarray:
+        """Get the parameter space list as a NumPy array of numeric values. 
+        
+        This is a view of the NumPy array returned by `get_list_numpy`, but with only numeric values.
+        If the searchspace contains non-numeric values, their index will be used instead.
+
+        Returns:
+            the NumPy array.
+        """
+        if self.__list_numpy_numeric is None:
+            self.__list_numpy_numeric = np.where(self.tune_param_is_numeric_mask, self.get_list_numpy(), self.get_list_param_indices_numpy())
+        return self.__list_numpy_numeric
 
     def get_true_tunable_params(self) -> dict:
         """Get the tunable parameters that are actually tunable, i.e. not constant after restrictions."""

From 0812b2bbd5dff1e8d2ebc5ff11abb9c4535b1470 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 21 Jul 2025 09:05:07 +0200
Subject: [PATCH 239/253] Implemented retrieving the true parameter
 configuration from a mixed numerical configuration

---
 kernel_tuner/searchspace.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index c2d96c1f0..4dbb5f157 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -771,6 +771,13 @@ def get_param_config_from_param_indices(self, param_indices: tuple) -> tuple:
             return tuple([self.__tune_params_from_index_lookup[index][param_index] for index, param_index in enumerate(param_indices)])
         return tuple(self.params_values[index][param_index] for index, param_index in enumerate(param_indices))
 
+    def get_param_config_from_numeric(self, param_config: tuple) -> tuple:
+        """Get the actual parameter configuration values from a numeric representation of the parameter configuration as in `get_list_numpy_numeric`."""
+        if self.__tune_params_from_index_lookup is None:
+            # if the lookup is not yet computed, compute it
+            self.get_list_param_indices_numpy()
+        return tuple([val if self.tune_param_is_numeric_mask[index] else self.__tune_params_from_index_lookup[index][val] for index, val in enumerate(param_config)])
+
     def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         """Get the param configs at the given indices."""
         # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices)

From 21d63bc0dab64eb9578de5b34889dbb9c9283cb0 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 21 Jul 2025 16:01:01 +0200
Subject: [PATCH 240/253] Implemented parameter index-, numeric- and mixed- 2D
 views of the search space

---
 kernel_tuner/searchspace.py | 44 ++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 4dbb5f157..0c0deea11 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -104,8 +104,9 @@ def __init__(
         self.params_values_indices = None
         self.build_neighbors_index = build_neighbors_index
         self.solver_method = solver_method
-        self.tune_param_is_numeric = { param_name: all(isinstance(val, (int, float)) and not any(isinstance(val, bool)) for param_name, val in tune_params.items()) }
-        self.tune_param_is_numeric_mask = np.array(tune_param_is_numeric.values(), dtype=bool)
+        self.tune_param_is_numeric = { param_name: all(isinstance(val, (int, float)) for val in param_values) and not any(isinstance(val, bool) for val in param_values) for (param_name, param_values) in tune_params.items() }
+        self.tune_param_is_numeric_mask = np.array(list(self.tune_param_is_numeric.values()), dtype=bool)
+        self.__numpy_types = [np.array(vals).dtype for vals in self.params_values]
         self.__tune_params_to_index_lookup = None
         self.__tune_params_from_index_lookup = None
         self.__list_param_indices = None
@@ -682,7 +683,10 @@ def get_list_dict(self) -> dict:
         return self.__dict
 
     def get_list_numpy(self) -> np.ndarray:
-        """Get the parameter space list as a NumPy array. Initializes the NumPy array if not yet done.
+        """Get the parameter space list as a NumPy array of tuples with mixed types. 
+        
+        Rarely faster or more convenient than `get_list_param_indices_numpy` or `get_list_numpy_numeric`. 
+        Initializes the NumPy array if not yet done.
 
         Returns:
             the NumPy array.
@@ -691,11 +695,15 @@ def get_list_numpy(self) -> np.ndarray:
             # create a numpy array of the search space
             # in order to have the tuples as tuples in numpy, the types are set with a string, but this will make the type np.void
             # type_string = ",".join(list(type(param).__name__ for param in parameter_space_list[0]))
-            self.__numpy = np.array(self.list)
+            types = np.dtype([(param_name, self.__numpy_types[index]) for index, param_name in enumerate(self.param_names)])
+            self.__numpy = np.array(self.list, dtype=types)
+            assert self.__numpy.shape[0] == self.size, f"Expected shape {(self.size,)}, got {self.__numpy.shape}"
+            assert len(self.__numpy[0]) == self.num_params, f"Expected tuples to be of length {len(self.__numpy[0])}, got {len(self.__numpy[0])}"
+        # return the numpy array
         return self.__numpy
 
     def get_list_param_indices_numpy(self) -> np.ndarray:
-        """Get the parameter space list as a NumPy array of parameter value indices. 
+        """Get the parameter space list as a 2D NumPy array of parameter value indices. 
         
         Same as mapping `get_param_indices` over the searchspace, but faster.
         Assumes that the parameter configs have the same order as `tune_params`.
@@ -718,12 +726,12 @@ def get_list_param_indices_numpy(self) -> np.ndarray:
             # register the computed results
             self.__tune_params_to_index_lookup = tune_params_to_index_lookup
             self.__tune_params_from_index_lookup = tune_params_from_index_lookup
-            self.__list_param_indices = np.array(list_param_indices)
-            assert self.__list_param_indices.shape == (self.size, self.num_params)
+            self.__list_param_indices = np.array(list_param_indices, dtype=int)
+            assert self.__list_param_indices.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_param_indices.shape}"
         return self.__list_param_indices
     
     def get_list_numpy_numeric(self) -> np.ndarray:
-        """Get the parameter space list as a NumPy array of numeric values. 
+        """Get the parameter space list as a 2D NumPy array of numeric values. 
         
         This is a view of the NumPy array returned by `get_list_numpy`, but with only numeric values.
         If the searchspace contains non-numeric values, their index will be used instead.
@@ -732,14 +740,19 @@ def get_list_numpy_numeric(self) -> np.ndarray:
             the NumPy array.
         """
         if self.__list_numpy_numeric is None:
-            self.__list_numpy_numeric = np.where(self.tune_param_is_numeric_mask, self.get_list_numpy(), self.get_list_param_indices_numpy())
+            # self.__list_numpy_numeric = np.where(self.tune_param_is_numeric_mask, self.get_list_numpy(), self.get_list_param_indices_numpy())
+            list_numpy_numeric = list()
+            for index, (param_name, is_numeric) in enumerate(self.tune_param_is_numeric.items()):
+                list_numpy_numeric.append(self.get_list_numpy()[param_name] if is_numeric else self.get_list_param_indices_numpy()[:, index])
+            self.__list_numpy_numeric = np.array(list_numpy_numeric).transpose()
+            assert self.__list_numpy_numeric.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_numpy_numeric.shape}"
         return self.__list_numpy_numeric
 
     def get_true_tunable_params(self) -> dict:
         """Get the tunable parameters that are actually tunable, i.e. not constant after restrictions."""
         if self.__true_tune_params is None:
             true_tune_params = dict()
-            numpy_list = self.get_list_numpy()
+            numpy_list = self.get_list_param_indices_numpy()
             for param_index, (param_name, param_values) in enumerate(self.tune_params.items()):
                 if len(param_values) == 1:
                     continue    # if the parameter is constant, skip it
@@ -773,9 +786,13 @@ def get_param_config_from_param_indices(self, param_indices: tuple) -> tuple:
 
     def get_param_config_from_numeric(self, param_config: tuple) -> tuple:
         """Get the actual parameter configuration values from a numeric representation of the parameter configuration as in `get_list_numpy_numeric`."""
+        if np.all(self.tune_param_is_numeric_mask):
+            return param_config  # if all parameters are numeric, return the input as is
         if self.__tune_params_from_index_lookup is None:
             # if the lookup is not yet computed, compute it
             self.get_list_param_indices_numpy()
+        if isinstance(param_config, np.ndarray):
+            param_config = tuple(param_config.tolist())     # if the input is a numpy array, convert it to a tuple
         return tuple([val if self.tune_param_is_numeric_mask[index] else self.__tune_params_from_index_lookup[index][val] for index, val in enumerate(param_config)])
 
     def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
@@ -903,14 +920,15 @@ def __prepare_neighbors_index(self):
 
     def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]:
         """Get the neighbors using Hamming distance from the parameter configuration."""
-        num_matching_params = np.count_nonzero(self.get_list_numpy() == param_config, -1)
+        param_indices = self.get_param_indices(param_config)
+        num_matching_params = np.count_nonzero(self.get_list_param_indices_numpy() == param_indices, -1)
         matching_indices = (num_matching_params == self.num_params - 1).nonzero()[0]
         return matching_indices
 
     def __get_random_neighbor_hamming(self, param_config: tuple) -> tuple:
         """Get a random neighbor at 1 Hamming distance from the parameter configuration."""
-        arr = self.get_list_numpy()
-        target = np.array(param_config)
+        arr = self.get_list_param_indices_numpy()
+        target = np.array(self.get_param_indices(param_config))
         assert arr[0].shape == target.shape
 
         # find the first row that differs from the target in exactly one column, return as soon as one is found

From 66f3f69e2cfd5a19bec2643451485822a2bf23e7 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 21 Jul 2025 16:02:40 +0200
Subject: [PATCH 241/253] Created additional tests for new and improved numpy
 views of the search space

---
 test/test_searchspace.py | 53 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 56d8256e1..620854eb7 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -442,6 +442,59 @@ def test_order_param_configs():
         assert expected_param_config in ordered_neighbors
     assert len(ordered_neighbors) == len(expected_order)
 
+def test_true_tunable_params():
+    """Test whether the true tunable parameters are correctly identified."""
+    # create a searchspace with mixed parameter types
+    mixed_tune_params = dict()
+    mixed_tune_params["int_param"] = [1, 2, 3]
+    mixed_tune_params["float_param"] = [3.0, 4.0, 5.0]
+    mixed_restrict = ["int_param >= 3"]
+
+    # create the searchspace object
+    searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads)
+
+    # check the size
+    assert searchspace.size == 3
+
+    # check that the true tunable parameters are correctly identified
+    true_tunable_params = searchspace.get_true_tunable_params()
+    assert len(true_tunable_params) == 1
+    assert "float_param" in true_tunable_params
+    assert true_tunable_params["float_param"] == mixed_tune_params["float_param"]
+
+
+def test_mixed_param_types():
+    """Test whether the searchspace can handle mixed parameter types."""
+    # create a searchspace with mixed parameter types
+    mixed_tune_params = dict()
+    mixed_tune_params["int_param"] = [1, 2, 3]
+    mixed_tune_params["float_param"] = [1.0, 2.0, 3.0]
+    mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"]
+    mixed_tune_params["bool_param"] = [True, False]
+    mixed_restrict = ["int_param + float_param > 3", "bool_param == False"]
+
+    # create the searchspace object
+    searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads)
+
+    # check the size
+    assert searchspace.size == 18 == len(searchspace.list) == len(searchspace.get_list_dict().keys())
+
+    # check whether param indices are correctly identified
+    assert searchspace.get_param_indices(tuple([1, 1.0, "Alpha", True])) == (0, 0, 0, 0)
+    assert searchspace.get_param_indices(tuple([2, 2.0, "Bravo", False])) == (1, 1, 1, 1)
+
+    # check whether the mapping of params to param indices and back works
+    for param_config in searchspace.list:
+        param_indices = searchspace.get_param_indices(param_config)
+        assert searchspace.get_param_config_from_param_indices(param_indices) == param_config
+
+    # check the parameter types
+    assert all(v1 == v2 for v1, v2 in zip(searchspace.tune_param_is_numeric_mask, [True, True, False, False]))
+
+    # check whether numeric params work as expected
+    for param_config_numeric, param_config in zip(searchspace.get_list_numpy_numeric(), searchspace.list):
+        assert searchspace.get_param_config_from_numeric(param_config_numeric) == param_config
+
 
 def test_small_searchspace():
     """Test a small real-world searchspace and the usage of the `max_threads` parameter."""

From 3b1aeed9283efbad35c16b3fbc16e655d5653fbb Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 22 Jul 2025 19:14:39 +0200
Subject: [PATCH 242/253] Implemented a fast distributed random sample
 technique suitable for the discrete constraint search spaces

---
 kernel_tuner/searchspace.py | 68 +++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 0c0deea11..f9abe1a8c 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -2,11 +2,11 @@
 import numbers
 import re
 from pathlib import Path
-from random import choice, shuffle
+from random import choice, shuffle, randint
 from typing import List, Union
 from warnings import warn
 from copy import deepcopy
-from collections import defaultdict
+from collections import defaultdict, deque
 
 import numpy as np
 from constraint import (
@@ -1097,6 +1097,70 @@ def get_random_sample(self, num_samples: int) -> List[tuple]:
             num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
+    def get_distributed_random_sample(self, num_samples: int, neighbor_method=None, sampling_factor=10) -> List[tuple]:
+        """Get a distributed random sample of parameter configurations, similar to LHS but much faster in a constrained setting."""
+        if self.size < num_samples:
+            warn(
+                f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})"
+            )
+            num_samples = self.size
+        
+        # adjust the number of random samples if necessary
+        sampling_factor = max(1, sampling_factor)
+        num_random_samples = min(sampling_factor * num_samples, self.size)
+        if num_random_samples == self.size or num_random_samples <= 1:
+            return self.get_random_sample(num_random_samples)
+        random_samples_indices = self.get_random_sample_indices(num_random_samples)
+
+        # calculate the desired parameter configuration indices, starting at the edges of the parameter indices and halving each time
+        def get_next_sample(lower: tuple, upper: tuple) -> tuple:
+            """Get the next sample indices by halving the range between upper and lower bounds."""
+            half = tuple(round((lower[i] + upper[i]) / 2) for i in range(upper))
+            if half == lower or half == upper:
+                # if the range is too small to make a difference, pick one of the bounds and replace one random index with an index of the other
+                random_sample = choice([lower, upper])
+                random_index = randint(0, self.num_params-1))
+                random_sample[random_index] = lower[random_index] if random_sample[random_index] == upper[random_index] else upper[random_index]
+                return tuple(random_sample)
+            return half
+
+        # seed the queue with the lower and upper bounds of the parameter indices
+        target_samples_param_indices = []
+        target_samples_param_indices.append(tuple(0 for _ in range(self.num_params)))
+        target_samples_param_indices.append(tuple(len(self.params_values[i]) - 1 for i in range(self.num_params)))
+        queue = deque([(target_samples_param_indices[0], target_samples_param_indices[1])])
+
+        # do a binary search for the target sample indices, until we have enough samples
+        while len(target_samples_param_indices) < num_samples:
+            lower, upper = queue.popleft()
+            next_sample = get_next_sample(lower, upper)
+            target_samples_param_indices.append(next_sample)
+            queue.append((lower, next_sample))
+            queue.append((next_sample, upper))
+
+        # filter out duplicate samples
+        target_samples_param_indices = list(set(target_samples_param_indices))
+
+        # for each of the target sample indices, calculate which parameter configuration is closest
+        target_sample_indices = list()
+        for target_sample_param_config_indices in target_samples_param_indices:
+            # calculate the absolute difference between the parameter value indices
+            abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices)
+            # find the param config index where the difference is the smallest
+            min_index_difference_index = np.argmin(abs_index_difference, axis=1)
+            target_sample_indices.append(min_index_difference_index)
+
+        # filter out duplicate samples and replace with random ones
+        target_sample_indices = list(set(target_sample_indices))
+        if len(target_sample_indices) < num_samples:
+            # if there are not enough unique samples, fill up with random samples
+            random_sample_indices = self.get_random_sample_indices(num_samples - len(target_sample_indices))
+            target_sample_indices.extend(random_sample_indices)
+
+        # TODO this same approach can be done with LHS on the parameter index values!
+
+        return self.get_param_configs_at_indices(target_sample_indices)
+
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""
         param_config_index = self.get_param_config_index(param_config)

From ce55392789d2e2503a9cf23573b200aa82baf668 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 23 Jul 2025 21:41:20 +0200
Subject: [PATCH 243/253] Various improvements to  and other searchspace
 functions

---
 kernel_tuner/searchspace.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index f9abe1a8c..a700098a4 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -1007,6 +1007,8 @@ def __get_neighbors_indices_strictlyadjacent(
         self, param_config_index: int = None, param_config: tuple = None
     ) -> List[int]:
         """Get the neighbors using strictly adjacent distance from the parameter configuration (parameter index absolute difference == 1)."""
+        if self.params_values_indices is None:
+            self.__prepare_neighbors_index()
         param_config_value_indices = (
             self.get_param_indices(param_config)
             if param_config_index is None
@@ -1023,6 +1025,8 @@ def __get_neighbors_indices_strictlyadjacent(
 
     def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param_config: tuple = None) -> List[int]:
         """Get the neighbors using adjacent distance from the parameter configuration (parameter index absolute difference >= 1)."""
+        if self.params_values_indices is None:
+            self.__prepare_neighbors_index()
         param_config_value_indices = (
             self.get_param_indices(param_config)
             if param_config_index is None
@@ -1097,8 +1101,8 @@ def get_random_sample(self, num_samples: int) -> List[tuple]:
             num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
-    def get_distributed_random_sample(self, num_samples: int, neighbor_method=None, sampling_factor=10) -> List[tuple]:
-        """Get a distributed random sample of parameter configurations, similar to LHS but much faster in a constrained setting."""
+    def get_distributed_random_sample_indices(self, num_samples: int, sampling_factor=10) -> List[int]:
+        """Get a distributed random sample of parameter configuration indices."""
         if self.size < num_samples:
             warn(
                 f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})"
@@ -1115,11 +1119,11 @@ def get_distributed_random_sample(self, num_samples: int, neighbor_method=None,
         # calculate the desired parameter configuration indices, starting at the edges of the parameter indices and halving each time
         def get_next_sample(lower: tuple, upper: tuple) -> tuple:
             """Get the next sample indices by halving the range between upper and lower bounds."""
-            half = tuple(round((lower[i] + upper[i]) / 2) for i in range(upper))
+            half = tuple(round((l + u) / 2) for l, u in zip(lower, upper))
             if half == lower or half == upper:
                 # if the range is too small to make a difference, pick one of the bounds and replace one random index with an index of the other
-                random_sample = choice([lower, upper])
-                random_index = randint(0, self.num_params-1))
+                random_sample = list(choice([lower, upper]))
+                random_index = randint(0, self.num_params-1)
                 random_sample[random_index] = lower[random_index] if random_sample[random_index] == upper[random_index] else upper[random_index]
                 return tuple(random_sample)
             return half
@@ -1142,24 +1146,31 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple:
         target_samples_param_indices = list(set(target_samples_param_indices))
 
         # for each of the target sample indices, calculate which parameter configuration is closest
+        if self.params_values_indices is None:
+            self.__prepare_neighbors_index()
         target_sample_indices = list()
         for target_sample_param_config_indices in target_samples_param_indices:
             # calculate the absolute difference between the parameter value indices
             abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices)
             # find the param config index where the difference is the smallest
-            min_index_difference_index = np.argmin(abs_index_difference, axis=1)
-            target_sample_indices.append(min_index_difference_index)
+            min_index_difference_index = np.argmin(np.sum(abs_index_difference, axis=1))
+            target_sample_indices.append(min_index_difference_index.item())
 
         # filter out duplicate samples and replace with random ones
         target_sample_indices = list(set(target_sample_indices))
-        if len(target_sample_indices) < num_samples:
+        while len(target_sample_indices) < num_samples:
             # if there are not enough unique samples, fill up with random samples
             random_sample_indices = self.get_random_sample_indices(num_samples - len(target_sample_indices))
-            target_sample_indices.extend(random_sample_indices)
+            target_sample_indices.extend(random_sample_indices.tolist())
+            target_sample_indices = list(set(target_sample_indices))
 
         # TODO this same approach can be done with LHS on the parameter index values!
 
-        return self.get_param_configs_at_indices(target_sample_indices)
+        return target_sample_indices
+
+    def get_distributed_random_sample(self, num_samples: int, sampling_factor=10) -> List[tuple]:
+        """Get a distributed random sample of parameter configurations."""
+        return self.get_param_configs_at_indices(self.get_distributed_random_sample_indices(num_samples, sampling_factor))
 
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""

From 5ab83f8c7107bff30741bfdac37c690eb298fe47 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 23 Jul 2025 21:44:52 +0200
Subject: [PATCH 244/253] Performance improvement to
 __get_random_neighbor_adjacent

---
 kernel_tuner/searchspace.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index a700098a4..0680720d6 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -958,11 +958,6 @@ def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
         # calculate the absolute difference between the parameter value indices
         abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices)
 
-        # calculate the difference between the parameter value indices
-        index_difference = np.abs(self.params_values_indices - param_config_value_indices)
-        # transpose to get the param indices difference per parameter instead of per param config
-        index_difference_transposed = index_difference.transpose()
-
         # start at an index difference of 1, progressively increase - potentially expensive if there are no neighbors until very late
         max_index_difference = max(max_index_difference_per_param)
         allowed_index_difference = 1

From 3a388659be8349f49f038ec51870f6d06ec37956 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 23 Jul 2025 21:45:12 +0200
Subject: [PATCH 245/253] Implemented test for
 get_distributed_random_sample_indices

---
 test/test_searchspace.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 620854eb7..32d5b1385 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -495,6 +495,31 @@ def test_mixed_param_types():
     for param_config_numeric, param_config in zip(searchspace.get_list_numpy_numeric(), searchspace.list):
         assert searchspace.get_param_config_from_numeric(param_config_numeric) == param_config
 
+def test_get_distributed_random_sample():
+    """Test whether the distributed random sample indices are as expected."""
+    # create a searchspace with mixed parameter types
+    mixed_tune_params = dict()
+    mixed_tune_params["int_param"] = [1, 2, 3]
+    mixed_tune_params["float_param"] = [1.0, 2.0, 3.0]
+    mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"]
+    mixed_tune_params["bool_param"] = [True, False]
+    mixed_restrict = ["int_param + float_param > 2", "bool_param == False"]
+
+    # create the searchspace object
+    searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads)
+
+    # check the size
+    assert searchspace.size == 24
+
+    # get the distributed random sample indices
+    num_samples = 10
+    distributed_random_sample_indices = searchspace.get_distributed_random_sample_indices(num_samples=num_samples, sampling_factor=2)
+
+    # check that the indices are unique and within bounds
+    assert len(distributed_random_sample_indices) == num_samples
+    assert len(set(distributed_random_sample_indices)) == num_samples
+    for index in distributed_random_sample_indices:
+        assert 0 <= index < searchspace.size
 
 def test_small_searchspace():
     """Test a small real-world searchspace and the usage of the `max_threads` parameter."""

From c43634b8699ab5738eff3d13ef17638285b1b47c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 23 Jul 2025 23:06:56 +0200
Subject: [PATCH 246/253] Implemented a new neighbor method for getting the
 neighbors clostest to the parameter indices of the given configuration

---
 kernel_tuner/searchspace.py | 46 +++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 0680720d6..a819bb66e 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -38,7 +38,7 @@
     get_interval,
 )
 
-supported_neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming"]
+supported_neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming", "closest-param-indices"]
 
 
 class Searchspace:
@@ -916,10 +916,35 @@ def get_tensorspace_bounds(self):
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""
-        self.params_values_indices = self.get_list_param_indices_numpy()
+        if self.params_values_indices is None:
+            self.params_values_indices = self.get_list_param_indices_numpy()
+
+    def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, param_index: int = None, return_one=False) -> List[int]:
+        """Get the neighbors closest in parameter indices difference from the parameter configuration. Always returns at least 1 neighbor."""
+        param_indices = self.get_param_indices(param_config)
+
+        # get the indices of the parameter values
+        if self.params_values_indices is None:
+            self.__prepare_neighbors_index()
+
+        # calculate the absolute difference between the parameter value indices
+        abs_index_difference = np.abs(self.params_values_indices - param_indices)
+        # calculate the sum of the absolute differences for each parameter configuration
+        sum_of_index_differences = np.sum(abs_index_difference, axis=1)
+        if param_index is not None:
+            # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration
+            sum_of_index_differences[param_index] = np.iinfo(sum_of_index_differences.dtype).max    # can't use np.inf as it is not an integer type
+        if return_one:
+            # if return_one is True, return the index of the closest parameter configuration (faster than finding all)
+            get_partial_neighbors_indices = [np.argmin(sum_of_index_differences)]
+        else:
+            # find the param config indices where the difference is the smallest
+            min_difference = np.min(sum_of_index_differences)
+            matching_indices = (sum_of_index_differences == min_difference).nonzero()[0]
+        return matching_indices
 
     def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]:
-        """Get the neighbors using Hamming distance from the parameter configuration."""
+        """Get the neighbors at 1 Hamming distance from the parameter configuration."""
         param_indices = self.get_param_indices(param_config)
         num_matching_params = np.count_nonzero(self.get_list_param_indices_numpy() == param_indices, -1)
         matching_indices = (num_matching_params == self.num_params - 1).nonzero()[0]
@@ -1070,12 +1095,16 @@ def __build_neighbors_index(self, neighbor_method) -> List[List[int]]:
                 self.__get_neighbors_indices_strictlyadjacent(param_config_index, param_config)
                 for param_config_index, param_config in enumerate(self.list)
             )
-
         if neighbor_method == "adjacent":
             return list(
                 self.__get_neighbors_indices_adjacent(param_config_index, param_config)
                 for param_config_index, param_config in enumerate(self.list)
             )
+        if neighbor_method == "closest-param-indices":
+            return list(
+                self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index)
+                for param_config_index, param_config in enumerate(self.list)
+            )
 
         raise NotImplementedError(f"The neighbor method {neighbor_method} is not implemented")
 
@@ -1148,7 +1177,12 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple:
             # calculate the absolute difference between the parameter value indices
             abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices)
             # find the param config index where the difference is the smallest
-            min_index_difference_index = np.argmin(np.sum(abs_index_difference, axis=1))
+            sum_of_index_differences = np.sum(abs_index_difference, axis=1)
+            param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices))
+            if param_index is not None:
+                # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration
+                sum_of_index_differences[param_index] = np.iinfo(sum_of_index_differences.dtype).max    # can't use np.inf as it is not an integer type
+            min_index_difference_index = np.argmin(sum_of_index_differences)
             target_sample_indices.append(min_index_difference_index.item())
 
         # filter out duplicate samples and replace with random ones
@@ -1198,6 +1232,8 @@ def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=No
             return self.__get_neighbors_indices_strictlyadjacent(param_config_index, param_config)
         if neighbor_method == "adjacent":
             return self.__get_neighbors_indices_adjacent(param_config_index, param_config)
+        if neighbor_method == "closest-param-indices":
+            return self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index)
         raise ValueError(f"The neighbor method {neighbor_method} is not in {supported_neighbor_methods}")
 
     def get_neighbors_indices(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:

From 0fe2ea5bcdec146c1438dcaf1cf84c5569299e6b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 24 Jul 2025 09:12:10 +0200
Subject: [PATCH 247/253] Wrote test for closest param indices neighbor method

---
 test/test_searchspace.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 32d5b1385..cb9147993 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -298,6 +298,24 @@ def test_neighbors_adjacent():
         assert random_neighbor in neighbors
         assert random_neighbor != test_config
 
+def test_neighbors_closest_param_indices():
+    """Test whether the closest parameter indices neighbors are as expected."""
+    test_config = tuple([1.5, 4, "string_1"])
+    expected_neighbors = [
+        (1.5, 5.5, 'string_1'), 
+        (1.5, 4, 'string_2')
+    ]
+
+    # test the neighbors
+    __test_neighbors(test_config, expected_neighbors, "closest-param-indices")
+
+    # test the random neighbor function
+    neighbors = simple_searchspace.get_neighbors(test_config, "closest-param-indices")
+    for i in range(10):
+        random_neighbor = simple_searchspace.get_random_neighbor(test_config, "closest-param-indices")
+        assert random_neighbor in neighbors
+        assert random_neighbor != test_config
+
 
 def test_neighbors_fictious():
     """Test whether the neighbors are as expected for a fictious parameter configuration (i.e. not existing in the search space due to restrictions)."""

From d0a5ba62570f2c7fb4ef97cd50a83d2c38a8c1ab Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 24 Jul 2025 17:28:27 +0200
Subject: [PATCH 248/253] Implemented a LHS sampler, automatic adjustement to
 smaller integer sizes where possible for numpy arrays

---
 kernel_tuner/searchspace.py | 125 +++++++++++++++++++++++++++++++-----
 1 file changed, 108 insertions(+), 17 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index a819bb66e..d3d00052f 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -9,6 +9,7 @@
 from collections import defaultdict, deque
 
 import numpy as np
+from scipy.stats.qmc import LatinHypercube
 from constraint import (
     BacktrackingSolver,
     Constraint,
@@ -110,6 +111,8 @@ def __init__(
         self.__tune_params_to_index_lookup = None
         self.__tune_params_from_index_lookup = None
         self.__list_param_indices = None
+        self.__list_param_indices_lower_bounds = None
+        self.__list_param_indices_upper_bounds = None
         self.__list_numpy_numeric = None
         self.__true_tune_params = None
         self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods }
@@ -726,10 +729,51 @@ def get_list_param_indices_numpy(self) -> np.ndarray:
             # register the computed results
             self.__tune_params_to_index_lookup = tune_params_to_index_lookup
             self.__tune_params_from_index_lookup = tune_params_from_index_lookup
-            self.__list_param_indices = np.array(list_param_indices, dtype=int)
+            self.__list_param_indices = np.array(list_param_indices)
             assert self.__list_param_indices.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_param_indices.shape}"
+
+            # calculate the actual minimum and maximum index for each parameter after restrictions
+            self.__list_param_indices_lower_bounds = np.min(self.__list_param_indices, axis=0)
+            self.__list_param_indices_upper_bounds = np.max(self.__list_param_indices, axis=0)
+
+            largest_index = np.max(self.__list_param_indices) * 2 # multiplied by two to account for worst-case absolute difference operations later
+            if largest_index >= 2**31:
+                # if the largest index is larger than 2**31, use int64 to avoid overflow
+                self.__list_param_indices = self.__list_param_indices.astype(np.int64)
+            # else:
+                # self.__list_param_indices = self.__list_param_indices.astype(np.int32)
+            # 
+            # the below types do not have a sizable performance benifit currently
+            elif largest_index >= 2**15:
+                # if the largest index is larger than 2**15, use int32 to avoid overflow
+                self.__list_param_indices = self.__list_param_indices.astype(np.int32)
+            elif largest_index >= 2**7:
+                # if the largest index is larger than 2**7, use int16 to avoid overflow
+                self.__list_param_indices = self.__list_param_indices.astype(np.int16)
+            else:
+                self.__list_param_indices = self.__list_param_indices.astype(np.int8)
         return self.__list_param_indices
-    
+
+    def get_param_indices_lower_bounds(self) -> np.ndarray:
+        """Get the lower bounds of the parameter indices after restrictions."""
+        if self.__list_param_indices_lower_bounds is None:
+            self.get_list_param_indices_numpy()
+        return self.__list_param_indices_lower_bounds
+
+    def get_param_indices_upper_bounds(self) -> np.ndarray:
+        """Get the upper bounds of the parameter indices after restrictions."""
+        if self.__list_param_indices_upper_bounds is None:
+            self.get_list_param_indices_numpy()
+        return self.__list_param_indices_upper_bounds
+
+    def get_list_param_indices_numpy_min(self):
+        """Get the minimum possible value in the numpy list of parameter indices."""
+        return np.iinfo(self.get_list_param_indices_numpy().dtype).min
+
+    def get_list_param_indices_numpy_max(self):
+        """Get the maximum possible value in the numpy list of parameter indices."""
+        return np.iinfo(self.get_list_param_indices_numpy().dtype).max
+
     def get_list_numpy_numeric(self) -> np.ndarray:
         """Get the parameter space list as a 2D NumPy array of numeric values. 
         
@@ -928,12 +972,12 @@ def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, para
             self.__prepare_neighbors_index()
 
         # calculate the absolute difference between the parameter value indices
-        abs_index_difference = np.abs(self.params_values_indices - param_indices)
+        abs_index_difference = np.abs(self.params_values_indices - np.array(param_indices), dtype=self.params_values_indices.dtype)
         # calculate the sum of the absolute differences for each parameter configuration
         sum_of_index_differences = np.sum(abs_index_difference, axis=1)
         if param_index is not None:
             # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration
-            sum_of_index_differences[param_index] = np.iinfo(sum_of_index_differences.dtype).max    # can't use np.inf as it is not an integer type
+            sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max()
         if return_one:
             # if return_one is True, return the index of the closest parameter configuration (faster than finding all)
             get_partial_neighbors_indices = [np.argmin(sum_of_index_differences)]
@@ -981,7 +1025,7 @@ def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple:
         max_index_difference_per_param = [max(len(self.params_values[p]) - 1 - i, i) for p, i in enumerate(param_config_value_indices)]
 
         # calculate the absolute difference between the parameter value indices
-        abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices)
+        abs_index_difference = np.abs(self.params_values_indices - np.array(param_config_value_indices), dtype=self.params_values_indices.dtype)
 
         # start at an index difference of 1, progressively increase - potentially expensive if there are no neighbors until very late
         max_index_difference = max(max_index_difference_per_param)
@@ -1035,7 +1079,7 @@ def __get_neighbors_indices_strictlyadjacent(
             else self.params_values_indices[param_config_index]
         )
         # calculate the absolute difference between the parameter value indices
-        abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices)
+        abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices, dtype=self.params_values_indices.dtype)
         # get the param config indices where the difference is one or less for each position
         matching_indices = (np.max(abs_index_difference, axis=1) <= 1).nonzero()[0]
         # as the selected param config does not differ anywhere, remove it from the matches
@@ -1057,18 +1101,17 @@ def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param
         # transpose to get the param indices difference per parameter instead of per param config
         index_difference_transposed = index_difference.transpose()
         # for each parameter get the closest upper and lower parameter (absolute index difference >= 1)
-        # np.PINF has been replaced by 1e12 here, as on some systems np.PINF becomes np.NINF
         upper_bound = tuple(
             np.min(
                 index_difference_transposed[p][(index_difference_transposed[p] > 0).nonzero()],
-                initial=1e12,
+                initial=self.get_list_param_indices_numpy_max(),
             )
             for p in range(self.num_params)
         )
         lower_bound = tuple(
             np.max(
                 index_difference_transposed[p][(index_difference_transposed[p] < 0).nonzero()],
-                initial=-1e12,
+                initial=self.get_list_param_indices_numpy_min(),
             )
             for p in range(self.num_params)
         )
@@ -1126,12 +1169,14 @@ def get_random_sample(self, num_samples: int) -> List[tuple]:
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
     def get_distributed_random_sample_indices(self, num_samples: int, sampling_factor=10) -> List[int]:
-        """Get a distributed random sample of parameter configuration indices."""
-        if self.size < num_samples:
+        """Get a distributed random sample of parameter configuration indices. Note: `get_LHS_random_sample_indices` is likely faster and better distributed."""
+        if num_samples > self.size:
             warn(
-                f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})"
+                f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})"
             )
-            num_samples = self.size
+            num_samples = round(self.size / 2)
+        if num_samples == self.size:
+            return np.shuffle([range(self.size)])
         
         # adjust the number of random samples if necessary
         sampling_factor = max(1, sampling_factor)
@@ -1175,13 +1220,13 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple:
         target_sample_indices = list()
         for target_sample_param_config_indices in target_samples_param_indices:
             # calculate the absolute difference between the parameter value indices
-            abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices)
+            abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype)
             # find the param config index where the difference is the smallest
             sum_of_index_differences = np.sum(abs_index_difference, axis=1)
             param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices))
             if param_index is not None:
                 # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration
-                sum_of_index_differences[param_index] = np.iinfo(sum_of_index_differences.dtype).max    # can't use np.inf as it is not an integer type
+                sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max()
             min_index_difference_index = np.argmin(sum_of_index_differences)
             target_sample_indices.append(min_index_difference_index.item())
 
@@ -1193,14 +1238,60 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple:
             target_sample_indices.extend(random_sample_indices.tolist())
             target_sample_indices = list(set(target_sample_indices))
 
-        # TODO this same approach can be done with LHS on the parameter index values!
-
         return target_sample_indices
 
     def get_distributed_random_sample(self, num_samples: int, sampling_factor=10) -> List[tuple]:
         """Get a distributed random sample of parameter configurations."""
         return self.get_param_configs_at_indices(self.get_distributed_random_sample_indices(num_samples, sampling_factor))
 
+    def get_LHS_sample_indices(self, num_samples: int) -> List[int]:
+        """Get a Latin Hypercube sample of parameter configuration indices."""
+        if num_samples > self.size:
+            warn(
+                f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})"
+            )
+            num_samples = round(self.size / 2)
+        if num_samples == self.size:
+            return np.shuffle([range(self.size)])
+        if self.params_values_indices is None:
+            self.__prepare_neighbors_index()
+
+        # get the Latin Hypercube of samples
+        target_samples_param_indices = LatinHypercube(len(self.params_values)).integers(
+            l_bounds=self.get_param_indices_lower_bounds(), 
+            u_bounds=self.get_param_indices_upper_bounds(), 
+            n=num_samples, 
+            endpoint=True)
+        target_samples_param_indices = np.array(target_samples_param_indices, dtype=self.params_values_indices.dtype)
+
+        # for each of the target sample indices, calculate which parameter configuration is closest
+        target_sample_indices = list()
+        for target_sample_param_config_indices in target_samples_param_indices:
+            # calculate the absolute difference between the parameter value indices
+            abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype)
+            # find the param config index where the difference is the smallest
+            sum_of_index_differences = np.sum(abs_index_difference, axis=1)
+            param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices))
+            if param_index is not None:
+                # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration
+                sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max()
+            min_index_difference_index = np.argmin(sum_of_index_differences)
+            target_sample_indices.append(min_index_difference_index.item())
+
+        # filter out duplicate samples and replace with random ones
+        target_sample_indices = list(set(target_sample_indices))
+        while len(target_sample_indices) < num_samples:
+            # if there are not enough unique samples, fill up with random samples
+            random_sample_indices = self.get_random_sample_indices(num_samples - len(target_sample_indices))
+            target_sample_indices.extend(random_sample_indices.tolist())
+            target_sample_indices = list(set(target_sample_indices))
+
+        return target_sample_indices
+
+    def get_LHS_sample(self, num_samples: int) -> List[tuple]:
+        """Get a distributed random sample of parameter configurations."""
+        return self.get_param_configs_at_indices(self.get_LHS_sample_indices(num_samples))
+
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]:
         """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods)."""
         param_config_index = self.get_param_config_index(param_config)

From ce297406e656f38953a186c266d591f60a30225b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 24 Jul 2025 17:29:15 +0200
Subject: [PATCH 249/253] Implemented tests for the LHS sampler and the true
 index bounds

---
 test/test_searchspace.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index cb9147993..f742a4b79 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -486,7 +486,7 @@ def test_mixed_param_types():
     # create a searchspace with mixed parameter types
     mixed_tune_params = dict()
     mixed_tune_params["int_param"] = [1, 2, 3]
-    mixed_tune_params["float_param"] = [1.0, 2.0, 3.0]
+    mixed_tune_params["float_param"] = [1.0, 2.0, 3.0, -4.4]
     mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"]
     mixed_tune_params["bool_param"] = [True, False]
     mixed_restrict = ["int_param + float_param > 3", "bool_param == False"]
@@ -513,6 +513,10 @@ def test_mixed_param_types():
     for param_config_numeric, param_config in zip(searchspace.get_list_numpy_numeric(), searchspace.list):
         assert searchspace.get_param_config_from_numeric(param_config_numeric) == param_config
 
+    # check whether the true index bounds are as expected
+    assert all(v1 == v2 for v1, v2 in zip(searchspace.get_param_indices_lower_bounds(), (0, 0, 0, 1)))
+    assert all(v1 == v2 for v1, v2 in zip(searchspace.get_param_indices_upper_bounds(), (2, 2, 2, 1)))
+
 def test_get_distributed_random_sample():
     """Test whether the distributed random sample indices are as expected."""
     # create a searchspace with mixed parameter types
@@ -539,6 +543,32 @@ def test_get_distributed_random_sample():
     for index in distributed_random_sample_indices:
         assert 0 <= index < searchspace.size
 
+def test_get_LHS_sample_indices():
+    """Test whether the distributed random sample indices are as expected."""
+    # create a searchspace with mixed parameter types
+    mixed_tune_params = dict()
+    mixed_tune_params["int_param"] = [1, 2, 3]
+    mixed_tune_params["float_param"] = [1.0, 2.0, 3.0]
+    mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"]
+    mixed_tune_params["bool_param"] = [True, False]
+    mixed_restrict = ["int_param + float_param > 2", "bool_param == False"]
+
+    # create the searchspace object
+    searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads)
+
+    # check the size
+    assert searchspace.size == 24
+
+    # get the distributed random sample indices
+    num_samples = 10
+    distributed_random_sample_indices = searchspace.get_LHS_sample_indices(num_samples=num_samples)
+
+    # check that the indices are unique and within bounds
+    assert len(distributed_random_sample_indices) == num_samples
+    assert len(set(distributed_random_sample_indices)) == num_samples
+    for index in distributed_random_sample_indices:
+        assert 0 <= index < searchspace.size
+
 def test_small_searchspace():
     """Test a small real-world searchspace and the usage of the `max_threads` parameter."""
     max_threads = 1024

From 16c8245593c4ec26cebc29d4be37b3c28d6d22dd Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 24 Jul 2025 17:38:17 +0200
Subject: [PATCH 250/253] Using LHS and closest-param-indices in new diff_evo
 for much better performance

---
 kernel_tuner/strategies/diff_evo.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 23b67518e..888672d76 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -105,9 +105,7 @@ def random_draw(idxs, mutate, best):
 def generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware):
     """Generate new population, returns Numpy array."""
     if constraint_aware:
-        samples = LatinHypercube(len(tune_params)).integers(l_bounds=0, u_bounds=max_idx, n=popsize, endpoint=True)
-        population = [indices_to_values(sample, tune_params) for sample in samples]
-        population = [repair(individual, searchspace) for individual in population]
+        population = [list(c) for c in searchspace.get_LHS_sample(popsize)]
     else:
         population = []
         for _ in range(popsize):
@@ -391,8 +389,9 @@ def repair(trial_vector, searchspace):
     """
     if not searchspace.is_param_config_valid(tuple(trial_vector)):
         # search for valid configurations neighboring trial_vector
+        for neighbor_method in ["closest-param-indices"]:
         # start from strictly-adjacent to increasingly allowing more neighbors
-        for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+        # for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
             new_trial_vector = searchspace.get_random_neighbor(tuple(trial_vector), neighbor_method=neighbor_method)
             if new_trial_vector is not None:
                 # print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}")

From c044ef8074a55b895843af7d03cf4c579432204a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Jul 2025 17:39:19 +0200
Subject: [PATCH 251/253] Disabled full validate on load for hyperparameter
 tuning

---
 kernel_tuner/backends/hypertuner.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 50971f5aa..83090b6a0 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -150,11 +150,23 @@ def synchronize(self):
         return super().synchronize()
     
     def run_kernel(self, func, gpu_args=None, threads=None, grid=None, stream=None):
+        # from cProfile import Profile
+    
+        # # generate the experiments file
+        # experiments_filepath = Path(func)
+
+        # # run the methodology to get a fitness score for this configuration
+        # with Profile() as pr:
+        #     scores = get_strategy_scores(str(experiments_filepath), full_validate_on_load=False)
+        #     pr.dump_stats('diff_evo_hypertune_hotspot.prof')
+        # self.last_score = scores[list(scores.keys())[0]]['score']
+        # raise ValueError(scores)
+    
         # generate the experiments file
         experiments_filepath = Path(func)
 
         # run the methodology to get a fitness score for this configuration
-        scores = get_strategy_scores(str(experiments_filepath))
+        scores = get_strategy_scores(str(experiments_filepath), full_validate_on_load=False)
         self.last_score = scores[list(scores.keys())[0]]['score']
 
         # remove the experiments file

From cde78231989be8d2d8f8b34bfe2847cc9d4cf2f8 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Jul 2025 17:39:51 +0200
Subject: [PATCH 252/253] Extended hyperparameters for optimization algorithms
 in paper

---
 kernel_tuner/hyper.py | 48 ++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 0bdb513e7..90b61a9b9 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -100,7 +100,7 @@ def put_if_not_present(target_dict, key, value):
 
     parser = ArgumentParser()
     parser.add_argument("strategy_to_tune", type=str, help="The strategy to tune hyperparameters for.")
-    parser.add_argument("--meta_strategy", nargs='?', default="genetic_algorithm", type=str, help="The meta-strategy to use for hyperparameter tuning.")
+    parser.add_argument("--meta_strategy", nargs='?', default="dual_annealing", type=str, help="The meta-strategy to use for hyperparameter tuning.")
     parser.add_argument("--max_time", nargs='?', default=60*60*24, type=int, help="The maximum time in seconds for the hyperparameter tuning.")
     args = parser.parse_args()
     strategy_to_tune = args.strategy_to_tune
@@ -119,12 +119,20 @@ def put_if_not_present(target_dict, key, value):
     # select the hyperparameter parameters for the selected optimization algorithm
     restrictions = []
     if strategy_to_tune.lower() == "pso":
+        # exhaustive search for PSO hyperparameters
+        # hyperparams = {
+        #     'popsize': [10, 20, 30],
+        #     'maxiter': [50, 100, 150],
+        #     # 'w': [0.25, 0.5, 0.75],   # disabled due to low influence according to KW-test (H=0.0215) and mutual information
+        #     'c1': [1.0, 2.0, 3.0],
+        #     'c2': [0.5, 1.0, 1.5]
+        # }
         hyperparams = {
-            'popsize': [10, 20, 30],
-            'maxiter': [50, 100, 150],
+            'popsize': list(range(2, 50+1, 2)),
+            'maxiter': list(range(10, 200, 10)),
             # 'w': [0.25, 0.5, 0.75],   # disabled due to low influence according to KW-test (H=0.0215) and mutual information
-            'c1': [1.0, 2.0, 3.0],
-            'c2': [0.5, 1.0, 1.5]
+            'c1': [round(n, 2) for n in np.arange(1.0, 3.5+0.25, 0.25).tolist()],
+            'c2': [round(n, 2) for n in np.arange(0.5, 2.0+0.25, 0.25).tolist()]
         }
     elif strategy_to_tune.lower() == "firefly_algorithm":
         hyperparams = {
@@ -148,10 +156,10 @@ def put_if_not_present(target_dict, key, value):
     elif strategy_to_tune.lower() == "diff_evo":
         hyperparams = {
             'method': ["best1bin", "rand1bin", "best2bin", "rand2bin", "best1exp", "rand1exp", "best2exp", "rand2exp", "currenttobest1bin", "currenttobest1exp", "randtobest1bin", "randtobest1exp"],   # best1bin
-            'popsize': list(range(1, 100+1, 1)),   # 50
+            'popsize': list(range(2, 50+1, 2)),   # 50
             'popsize_times_dimensions': [True, False],  # False
-            'F': list(np.arange(0.05, 2.0+0.05, 0.05)),  # 1.3
-            'CR': list(np.arange(0.05, 1.0+0.05, 0.05))  # 0.9
+            'F': [round(n, 2) for n in np.arange(0.1, 2.0+0.1, 0.1).tolist()],  # 1.3
+            'CR': [round(n, 2) for n in np.arange(0.05, 1.0+0.05, 0.05).tolist()]  # 0.9
         }
     elif strategy_to_tune.lower() == "basinhopping":
         hyperparams = {
@@ -159,11 +167,17 @@ def put_if_not_present(target_dict, key, value):
             'T': [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5],
         }
     elif strategy_to_tune.lower() == "genetic_algorithm":
+        # hyperparams = {
+        #     'method': ["single_point", "two_point", "uniform", "disruptive_uniform"],
+        #     'popsize': [10, 20, 30],
+        #     'maxiter': [50, 100, 150],
+        #     'mutation_chance': [5, 10, 20]
+        # }
         hyperparams = {
             'method': ["single_point", "two_point", "uniform", "disruptive_uniform"],
-            'popsize': [10, 20, 30],
-            'maxiter': [50, 100, 150],
-            'mutation_chance': [5, 10, 20]
+            'popsize': list(range(2, 50+1, 2)),
+            'maxiter': list(range(10, 200, 10)),
+            'mutation_chance': list(range(5, 100, 5))
         }
     elif strategy_to_tune.lower() == "greedy_mls":
         hyperparams = {
@@ -172,11 +186,17 @@ def put_if_not_present(target_dict, key, value):
             'randomize': [True, False]
         }
     elif strategy_to_tune.lower() == "simulated_annealing":
+        # hyperparams = {
+        #     'T': [0.5, 1.0, 1.5],
+        #     'T_min': [0.0001, 0.001, 0.01],
+        #     'alpha': [0.9925, 0.995, 0.9975],
+        #     'maxiter': [1, 2, 3]
+        # }
         hyperparams = {
-            'T': [0.5, 1.0, 1.5],
-            'T_min': [0.0001, 0.001, 0.01],
+            'T': [round(n, 2) for n in np.arange(0.1, 2.0+0.1, 0.1).tolist()],
+            'T_min': [round(n, 4) for n in np.arange(0.0001, 0.1, 0.001).tolist()],
             'alpha': [0.9925, 0.995, 0.9975],
-            'maxiter': [1, 2, 3]
+            'maxiter': list(range(1, 10, 1))
         }
     elif strategy_to_tune.lower() == "bayes_opt":
         hyperparams = {

From f56c1fd298e4d38974b5abac91894619d309eab6 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Jul 2025 18:12:35 +0200
Subject: [PATCH 253/253] Updated required python-constraint version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0565f571e..ffc0583be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ dependencies = [
     "scipy>=1.14.1",    # Python >=3.13 needs scipy >=1.14
     "packaging",        # required by file_utils
     "jsonschema",
-    "python-constraint2>=2.3.1",
+    "python-constraint2>=2.4.0",
     "xmltodict",
     "pandas>=2.0.0",
     "scikit-learn>=1.0.2",