From 544230390ec02c1a2674b52c627129c0fb3deeb3 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 1 Dec 2021 08:51:30 +0100
Subject: [PATCH 001/168] New Bayesian Optimization methods

---
 kernel_tuner/interface.py                     |   5 +-
 kernel_tuner/strategies/bayes_opt.py          |  62 +-
 kernel_tuner/strategies/bayes_opt_GPyTorch.py | 918 ++++++++++++++++++
 .../strategies/bayes_opt_alt_BOTorch.py       |  83 ++
 kernel_tuner/strategies/bayes_opt_old.py      | 837 ++++++++++++++++
 5 files changed, 1868 insertions(+), 37 deletions(-)
 create mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 create mode 100644 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 create mode 100644 kernel_tuner/strategies/bayes_opt_old.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 1c0448214..e2da180c5 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -45,7 +45,7 @@
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt
+from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt, bayes_opt_old, bayes_opt_GPyTorch, bayes_opt_alt_BOTorch
 
 strategy_map = {
     "brute_force": brute_force,
@@ -59,6 +59,9 @@
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
     "bayes_opt": bayes_opt,
+    "bayes_opt_old": bayes_opt_old,
+    "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
+    "bayes_opt_BOTorch": bayes_opt_alt_BOTorch,
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index ccdd2638b..56ec6e720 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -376,12 +376,12 @@ def predict(self, x) -> Tuple[float, float]:
         """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
         return self.__model.predict([x], return_std=True)
 
-    def predict_list(self, lst: list) -> Tuple[list, list, list]:
+    def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
         """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             mu, std = self.__model.predict(lst, return_std=True)
-            return list(zip(mu, std)), mu, std
+            return mu, std
 
     def fit_observations_to_model(self):
         """ Update the model based on the current list of observations """
@@ -459,7 +459,7 @@ def initial_sample(self):
             if self.is_valid(observation):
                 collected_samples += 1
         self.fit_observations_to_model()
-        _, _, std = self.predict_list(self.unvisited_cache)
+        _, std = self.predict_list(self.unvisited_cache)
         self.initial_sample_mean = np.mean(self.__valid_observations)
         # Alternatively:
         # self.initial_sample_std = np.std(self.__valid_observations)
@@ -490,8 +490,8 @@ def __optimize(self, max_fevals):
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
             list_of_acquisition_values = self.__af(predictions, hyperparam)
             # afterwards select the best AF value
             best_af = self.argopt(list_of_acquisition_values)
@@ -522,8 +522,8 @@ def __optimize_multi(self, max_fevals):
             time_start = time.perf_counter_ns()
             # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
             aqfs = self.multi_afs
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             time_predictions = time.perf_counter_ns()
@@ -635,8 +635,8 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                 raise ValueError(self.error_message_searchspace_fully_observed)
             observations_median = np.median(self.__valid_observations)
             if increase_precision is False:
-                predictions, _, std = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(std)
+                predictions = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(predictions[1])
             for af_index, af in enumerate(aqfs):
                 if af_index in skip_af_index:
                     continue
@@ -647,7 +647,8 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                     hyperparam = self.contextual_variance(std)
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
+                np.delete(predictions[0], best_af)    # to avoid going out of bounds
+                np.delete(predictions[1], best_af)
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -719,8 +720,8 @@ def __optimize_multi_fast(self, max_fevals):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             for af in aqfs:
@@ -728,7 +729,8 @@ def __optimize_multi_fast(self, max_fevals):
                     break
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
+                del predictions[0][best_af]    # to avoid going out of bounds
+                del predictions[1][best_af]
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -746,65 +748,53 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
         """ Acquisition function Probability of Improvement (PI) """
 
         # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1E-9)) for (x_mu, x_std) in predictions)
+        list_diff_improvement = -((fplus - x_mu) / (x_std + 1E-9))
 
         # compute probability of improvement with CDF in bulk
         list_prob_improvement = norm.cdf(list_diff_improvement)
-
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         """ Acquisition function Expected Improvement (EI) """
 
         # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement, CDF and PDF in bulk
-        list_diff_improvement = list((fplus - x_mu) / (x_std + 1E-9) for (x_mu, x_std) in predictions)
+        list_diff_improvement = (fplus - x_mu) / (x_std + 1E-9)
         list_cdf = norm.cdf(list_diff_improvement)
         list_pdf = norm.pdf(list_diff_improvement)
 
-        # specify AF calculation
-        def exp_improvement(index) -> float:
-            x_mu, x_std = predictions[index]
-            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
-            return -ei
-
-        # calculate AF
-        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
+        # compute expected improvement in bulk
+        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         """ Acquisition function Lower Confidence Bound (LCB) """
 
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         beta = hyperparam
 
         # compute LCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        list_lower_confidence_bound = (x_mu - beta * x_std)
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
         """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
 
         # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
 
@@ -816,7 +806,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
 
         # compute UCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        list_lower_confidence_bound = (x_mu - beta * x_std)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
@@ -824,7 +814,7 @@ def visualize_after_opt(self):
         print(self.__model.kernel_.get_params())
         print(self.__model.log_marginal_likelihood())
         import matplotlib.pyplot as plt
-        _, mu, std = self.predict_list(self.searchspace)
+        mu, std = self.predict_list(self.searchspace)
         brute_force_observations = list()
         for param_config in self.searchspace:
             obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
new file mode 100644
index 000000000..31b987ca6
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
@@ -0,0 +1,918 @@
+""" Bayesian Optimization implementation from the thesis by Willemsen """
+from copy import deepcopy
+from random import randint, shuffle
+import itertools
+import warnings
+import time
+from typing import Tuple
+
+import numpy as np
+from scipy.stats import norm
+
+# BO imports
+try:
+    import torch
+    import gpytorch
+    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
+    from sklearn.exceptions import ConvergenceWarning
+    from skopt.sampler import Lhs
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from kernel_tuner.strategies import minimize
+from kernel_tuner import util
+
+supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
+
+
+def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
+    """ Generates normalization and denormalization dictionaries """
+    original_to_normalized = dict()
+    normalized_to_original = dict()
+    for param_name in tune_params.keys():
+        original_to_normalized_dict = dict()
+        normalized_to_original_dict = dict()
+        for value_index, value in enumerate(tune_params[param_name]):
+            normalized_value = eps * value_index + 0.5 * eps
+            normalized_to_original_dict[normalized_value] = value
+            original_to_normalized_dict[value] = normalized_value
+        original_to_normalized[param_name] = original_to_normalized_dict
+        normalized_to_original[param_name] = normalized_to_original_dict
+    return original_to_normalized, normalized_to_original
+
+
+def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
+    """ Normalize the parameter space given a normalization dictionary """
+    keys = list(tune_params.keys())
+    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
+    return param_space_normalized
+
+
+def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
+    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    pruned_tune_params_mask = list()
+    removed_tune_params = list()
+    param_names = list(tune_params.keys())
+    for index, key in enumerate(tune_params.keys()):
+        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
+        if len(tune_params[key]) > 1:
+            removed_tune_params.append(None)
+        else:
+            value = tune_params[key][0]
+            normalized = normalize_dict[param_names[index]][value]
+            removed_tune_params.append(normalized)
+    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
+        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
+    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
+    return parameter_space, removed_tune_params
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process. Allows setting hyperparameters via the strategy_options key.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
+    if not bayes_opt_present:
+        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed, please install scikit-learn and scikit-optimize")
+
+    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
+    tune_params = tuning_options.tune_params
+    tuning_options["scaling"] = True
+    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
+
+    # compute cartesian product of all tunable parameters
+    parameter_space = itertools.product(*tune_params.values())
+
+    # check for search space restrictions
+    if tuning_options.restrictions is not None:
+        tuning_options.verbose = False
+    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
+    parameter_space = list(parameter_space)
+    if len(parameter_space) < 1:
+        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
+    if len(parameter_space) == 1:
+        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
+
+    # normalize search space to [0,1]
+    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
+    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
+
+    # prune the parameter space to remove dimensions that have a constant parameter
+    if prune_parameterspace:
+        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
+    else:
+        parameter_space = list(parameter_space)
+        removed_tune_params = [None] * len(tune_params.keys())
+
+    # initialize and optimize
+    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
+    results = bo.optimize(max_fevals)
+
+    return results, runner.dev.get_environment()
+
+
+class ExactGPModel(gpytorch.models.ExactGP):
+    """ Very simple exact Gaussian Process model """
+
+    def __init__(self, train_x, train_y, likelihood):
+        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
+        self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
+        self.covar_module = gpytorch.kernels.MaternKernel(nu=1.5)    # TODO maybe try ScaleKernel(MaternKernel)
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+
+class BayesianOptimization():
+
+    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
+                 runner, opt_direction='min'):
+        time_start = time.perf_counter_ns()
+
+        # supported hyperparameter values
+        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
+        self.supported_methods = supported_methods
+        self.supported_sampling_methods = ["random", "lhs"]
+        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
+
+        def get_hyperparam(name: str, default, supported_values=list()):
+            value = tuning_options.strategy_options.get(name, default)
+            if len(supported_values) > 0 and value not in supported_values:
+                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+            return value
+
+        # get hyperparameters
+        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
+        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
+        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
+        acq = acquisition_function
+        acq_params = get_hyperparam("methodparams", {})
+        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
+        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
+        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
+        self.training_iter = get_hyperparam("training_iter", 10)
+        self.num_initial_samples = get_hyperparam("popsize", 20)
+        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
+        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
+        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
+
+        # set acquisition function hyperparameter defaults where missing
+        if 'explorationfactor' not in acq_params:
+            acq_params['explorationfactor'] = 'CV'
+        if 'zeta' not in acq_params:
+            acq_params['zeta'] = 1
+        if 'skip_duplicate_after' not in acq_params:
+            acq_params['skip_duplicate_after'] = 5
+
+        # set arguments
+        self.kernel_options = kernel_options
+        self.tuning_options = tuning_options
+        self.tune_params = tuning_options.tune_params
+        self.param_names = list(self.tune_params.keys())
+        self.normalized_dict = normalize_dict
+        self.denormalized_dict = denormalize_dict
+        self.runner = runner
+        self.max_threads = runner.dev.max_threads
+        self.log_timings = False
+
+        # set optimization constants
+        self.invalid_value = 1e20
+        self.opt_direction = opt_direction
+        if opt_direction == 'min':
+            self.worst_value = np.PINF
+            self.argopt = np.argmin
+        elif opt_direction == 'max':
+            self.worst_value = np.NINF
+            self.argopt = np.argmax
+        else:
+            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
+
+        # set the acquisition function and surrogate model
+        self.optimize = self.__optimize
+        self.af_name = acquisition_function
+        self.af_params = acq_params
+        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
+        self.set_acquisition_function(acquisition_function)
+        # self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
+
+        # set remaining values
+        self.results = []
+        self.__searchspace = searchspace
+        self.removed_tune_params = removed_tune_params
+        self.searchspace_size = len(self.searchspace)
+        self.hyperparams = {
+            'loss': np.nan,
+            'lengthscale': np.nan,
+            'noise': np.nan,
+        }
+        self.num_dimensions = len(self.dimensions())
+        self.__current_optimum = self.worst_value
+        self.cv_norm_maximum = None
+        self.fevals = 0
+        self.__visited_num = 0
+        self.__visited_valid_num = 0
+        self.__visited_searchspace_indices = [False] * self.searchspace_size
+        self.__observations = [np.NaN] * self.searchspace_size
+        self.__valid_observation_indices = [False] * self.searchspace_size
+        self.__valid_params = list()
+        self.__valid_observations = list()
+        self.unvisited_cache = self.unvisited()
+        time_setup = time.perf_counter_ns()
+        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
+
+        # take initial sample
+        self.initial_sample()
+        time_initial_sample = time.perf_counter_ns()
+
+        # print the timings
+        if self.log_timings:
+            time_taken_setup = round(time_setup - time_start, 3) / 1000
+            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
+            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
+            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
+
+    @property
+    def searchspace(self):
+        return self.__searchspace
+
+    @property
+    def observations(self):
+        return self.__observations
+
+    @property
+    def current_optimum(self):
+        return self.__current_optimum
+
+    @current_optimum.setter
+    def current_optimum(self, value: float):
+        self.__current_optimum = value
+
+    def is_better_than(self, a: float, b: float) -> bool:
+        """ Determines which one is better depending on optimization direction """
+        return a < b if self.opt_direction == 'min' else a > b
+
+    def is_not_visited(self, index: int) -> bool:
+        """ Returns whether a searchspace index has not been visited """
+        return not self.__visited_searchspace_indices[index]
+
+    def is_valid(self, observation: float) -> bool:
+        """ Returns whether an observation is valid """
+        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+
+    def get_af_by_name(self, name: str):
+        """ Get the basic acquisition functions by their name """
+        basic_af_names = ['ei', 'poi', 'lcb']
+        if name == 'ei':
+            return self.af_expected_improvement
+        elif name == 'poi':
+            return self.af_probability_of_improvement
+        elif name == 'lcb':
+            return self.af_lower_confidence_bound
+        raise ValueError(f"{name} not in {basic_af_names}")
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function """
+        if acquisition_function == 'poi':
+            self.__af = self.af_probability_of_improvement
+        elif acquisition_function == 'ei':
+            self.__af = self.af_expected_improvement
+        elif acquisition_function == 'lcb':
+            self.__af = self.af_lower_confidence_bound
+        elif acquisition_function == 'lcb-srinivas':
+            self.__af = self.af_lower_confidence_bound_srinivas
+        elif acquisition_function == 'random':
+            self.__af = self.af_random
+        elif acquisition_function == 'multi':
+            self.optimize = self.__optimize_multi
+        elif acquisition_function == 'multi-advanced':
+            self.optimize = self.__optimize_multi_advanced
+        elif acquisition_function == 'multi-fast':
+            self.optimize = self.__optimize_multi_fast
+        else:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
+        """ Set the surrogate model with a covariance function and lengthscale """
+        # TODO remove or adapt this
+        if cov_kernel_name == "constantrbf":
+            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "rbf":
+            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern32":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern52":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
+        else:
+            raise ValueError(f"Acquisition function must be one of {self.supported_cov_kernels}, is {cov_kernel_name}")
+        likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        self.__model = ExactGPModel(train_x, train_y, likelihood)
+        # self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
+
+    def valid_params_observations(self) -> Tuple[list, list]:
+        """ Returns a list of valid observations and their parameter configurations """
+        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
+        params = list()
+        observations = list()
+        for index, valid in enumerate(self.__valid_observation_indices):
+            if valid is True:
+                params.append(self.searchspace[index])
+                observations.append(self.observations[index])
+        return params, observations
+
+    def unvisited(self) -> list:
+        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
+        return params
+
+    def find_param_config_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the search space if it exists """
+        return self.searchspace.index(param_config)
+
+    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the unvisited cache if it exists """
+        return self.unvisited_cache.index(param_config)
+
+    def normalize_param_config(self, param_config: tuple) -> tuple:
+        """ Normalizes a parameter configuration """
+        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return normalized
+
+    def denormalize_param_config(self, param_config: tuple) -> tuple:
+        """ Denormalizes a parameter configuration """
+        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return denormalized
+
+    def unprune_param_config(self, param_config: tuple) -> tuple:
+        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        unpruned = list()
+        pruned_count = 0
+        for removed in self.removed_tune_params:
+            if removed is not None:
+                unpruned.append(removed)
+            else:
+                unpruned.append(param_config[pruned_count])
+                pruned_count += 1
+        return tuple(unpruned)
+
+    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
+        """ Adjust the visited and valid index records accordingly """
+        validity = self.is_valid(observation)
+        self.__visited_num += 1
+        self.__observations[index] = observation
+        self.__visited_searchspace_indices[index] = True
+        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
+        self.__valid_observation_indices[index] = validity
+        if validity is True:
+            self.__visited_valid_num += 1
+            self.__valid_params.append(param_config)
+            self.__valid_observations.append(observation)
+            if self.is_better_than(observation, self.current_optimum):
+                self.current_optimum = observation
+
+    def predict(self, x) -> Tuple[float, float]:
+        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        return self.__model.predict([x], return_std=True)
+
+    def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
+        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            test_x = torch.Tensor(lst)
+            observed_pred = self.__likelihood(self.__model(test_x))
+            mu = observed_pred.mean
+            std = observed_pred.variance
+            return mu.numpy(), std.numpy()
+
+    def evaluate_objective_function(self, param_config: tuple) -> float:
+        """ Evaluates the objective function """
+        param_config = self.unprune_param_config(param_config)
+        denormalized_param_config = self.denormalize_param_config(param_config)
+        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
+            return self.invalid_value
+        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+        self.fevals += 1
+        self.add_model_hyperparams_to_result(denormalized_param_config)
+        return val
+
+    def add_model_hyperparams_to_result(self, param_config: tuple):
+        """ Add the model parameters (loss and noise) to the results dict at the last result """
+        # assert that the results index corresponds to the last index
+        assert self.find_config_index_in_results(param_config) == len(self.results) - 1
+
+        for key, value in self.hyperparams.items():
+            # print(f"{key}: {value}")
+            self.results[-1][key] = value
+
+    def find_config_index_in_results(self, param_config: tuple):
+        """ Find the index of a parameter configuration in the results. Beware that this can be very slow! """
+        found_indices = list()
+        for results_index, result_dict in enumerate(self.results):
+            keys = list(result_dict.keys())
+            found = True
+            for index, value in enumerate(param_config):
+                if result_dict[keys[index]] != value:
+                    found = False
+            if found is True:
+                found_indices.append(results_index)
+        assert len(found_indices) == 1
+        return found_indices[0]
+
+    def dimensions(self) -> list:
+        """ List of parameter values per parameter """
+        return self.tune_params.values()
+
+    def draw_random_sample(self) -> Tuple[list, int]:
+        """ Draw a random sample from the unvisited parameter configurations """
+        if len(self.unvisited_cache) < 1:
+            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
+        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
+        param_config = self.unvisited_cache[index]
+        actual_index = self.find_param_config_index(param_config)
+        return param_config, actual_index
+
+    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
+        """ Draws an LHS-distributed sample from the search space """
+        if self.searchspace_size < num_samples:
+            raise ValueError("Can't sample more than the size of the search space")
+        if self.sampling_crit is None:
+            lhs = Lhs(lhs_type="centered", criterion=None)
+        else:
+            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
+        param_configs = lhs.generate(self.dimensions(), num_samples)
+        indices = list()
+        normalized_param_configs = list()
+        for i in range(len(param_configs) - 1):
+            try:
+                param_config = self.normalize_param_config(param_configs[i])
+                index = self.find_param_config_index(param_config)
+                indices.append(index)
+                normalized_param_configs.append(param_config)
+            except ValueError:
+                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
+                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
+                continue
+        return list(zip(normalized_param_configs, indices))
+
+    def train_model_hyperparams(self):
+        """ Train the model and likelihood hyperparameters """
+        # set to training modes
+        self.__model.train()
+        self.__likelihood.train()
+
+        # Use the adam optimizer
+        optimizer = torch.optim.Adam(self.__model.parameters(), lr=0.1)    # Includes GaussianLikelihood parameters
+
+        # "Loss" for GPs - the marginal log likelihood
+        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.__likelihood, self.__model)
+
+        loss = 0
+        lengthscale = 0
+        noise = 0
+        for i in range(self.training_iter):
+            # Zero gradients from previous iteration
+            optimizer.zero_grad()
+            # Output from model
+            output = self.__model(self.__tparams)
+            # Calc loss and backprop gradients
+            loss = -mll(output, self.__tobservations)
+            loss.backward()
+            # print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' %
+            #       (i + 1, self.training_iter, loss.item(), self.__model.covar_module.base_kernel.lengthscale.item(), self.__model.likelihood.noise.item()))
+            optimizer.step()
+
+        # set to prediction mode
+        self.__model.eval()
+        self.__likelihood.eval()
+
+        # set the hyperparameters globally for reference
+        self.hyperparams = {
+            'loss': loss.item(),
+            'lengthscale': self.__model.covar_module.lengthscale.item(),
+            'noise': self.__model.likelihood.noise.item(),
+        }
+        # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
+
+    def initial_sample(self):
+        """ Draws an initial sample using random sampling """
+        if self.num_initial_samples <= 0:
+            raise ValueError("At least one initial sample is required")
+        if self.sampling_method == 'lhs':
+            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
+        elif self.sampling_method == 'random':
+            samples = list()
+        else:
+            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
+        # collect the samples
+        collected_samples = 0
+        for params, index in samples:
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            if self.is_valid(observation):
+                collected_samples += 1
+        # collect the remainder of the samples
+        while collected_samples < self.num_initial_samples:
+            params, index = self.draw_random_sample()
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            # check for validity to avoid having no actual initial samples
+            if self.is_valid(observation):
+                collected_samples += 1
+        # instantiate the model with the initial sample
+        self.__likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        self.__tparams = torch.Tensor(self.__valid_params)
+        self.__tobservations = torch.Tensor(self.__valid_observations)
+        self.__model = ExactGPModel(self.__tparams, self.__tobservations, self.__likelihood)
+        self.train_model_hyperparams()
+
+        # extract the predictions
+        _, std = self.predict_list(self.unvisited_cache)
+        self.initial_sample_mean = np.mean(self.__valid_observations)
+        # Alternatively:
+        # self.initial_sample_std = np.std(self.__valid_observations)
+        # self.initial_sample_mean = np.mean(predictions)
+        self.initial_std = np.mean(std)
+        self.cv_norm_maximum = self.initial_std
+
+    def contextual_variance(self, std: list):
+        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        if not self.af_params['explorationfactor'] == 'CV':
+            return None
+        if self.opt_direction == 'min':
+            if self.current_optimum == self.worst_value:
+                return 0.01
+            if self.current_optimum <= 0:
+                # doesn't work well for minimization beyond 0, should that even be a thing?
+                return abs(np.mean(std) / self.current_optimum)
+            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
+            cv = np.mean(std) / improvement_over_initial_sample
+            # normalize if available
+            if self.cv_norm_maximum:
+                cv = cv / self.cv_norm_maximum
+            return cv
+        return np.mean(std) / self.current_optimum
+
+    def __optimize(self, max_fevals):
+        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        while self.fevals < max_fevals:
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
+            list_of_acquisition_values = self.__af(predictions, hyperparam)
+            # afterwards select the best AF value
+            best_af = self.argopt(list_of_acquisition_values)
+            candidate_params = self.unvisited_cache[best_af]
+            candidate_index = self.find_param_config_index(candidate_params)
+            observation = self.evaluate_objective_function(candidate_params)
+            self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.train_model_hyperparams()
+        return self.results
+
+    def __optimize_multi(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
+        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
+        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
+        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
+        discount_factor = self.multi_afs_discount_factor
+        # setup the registration of duplicates and runtimes
+        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
+        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
+        skip_af_index = list()
+        af_runtimes = [0, 0, 0]
+        af_observations = [list(), list(), list()]
+        initial_sample_mean = np.mean(self.__valid_observations)
+        while self.fevals < max_fevals:
+            time_start = time.perf_counter_ns()
+            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
+            aqfs = self.multi_afs
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            time_predictions = time.perf_counter_ns()
+            actual_candidate_params = list()
+            actual_candidate_indices = list()
+            actual_candidate_af_indices = list()
+            duplicate_candidate_af_indices = list()
+            duplicate_candidate_original_af_indices = list()
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                timer_start = time.perf_counter()
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                time_taken = time.perf_counter() - timer_start
+                af_runtimes[af_index] += time_taken
+                is_duplicate = best_af in actual_candidate_indices
+                if not is_duplicate:
+                    candidate_params = self.unvisited_cache[best_af]
+                    actual_candidate_params.append(candidate_params)
+                    actual_candidate_indices.append(best_af)
+                    actual_candidate_af_indices.append(af_index)
+                # register whether the AF suggested a duplicate candidate
+                duplicate_candidate_af_count[af_index].pop(0)
+                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
+                if is_duplicate:
+                    # find the index of the AF that first registered the duplicate
+                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
+                    # register that AF as duplicate as well
+                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
+                    duplicate_candidate_af_indices.append(af_index)
+                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
+            time_afs = time.perf_counter_ns()
+            # evaluate the non-duplicate candidates
+            for index, af_index in enumerate(actual_candidate_af_indices):
+                candidate_params = actual_candidate_params[index]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if observation != self.invalid_value:
+                    # we use the registered observations for maximization of the discounted reward
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
+                else:
+                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
+                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
+            for index, af_index in enumerate(duplicate_candidate_af_indices):
+                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
+                af_observations[af_index].append(original_observation)
+            self.train_model_hyperparams()
+            time_eval = time.perf_counter_ns()
+            # assert that all observation lists of non-skipped acquisition functions are of the same length
+            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
+            # find the AFs elligble for being skipped
+            candidates_for_skip = list()
+            for af_index, count in enumerate(duplicate_candidate_af_count):
+                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
+                    candidates_for_skip.append(af_index)
+            # do not skip the AF with the lowest runtime
+            if len(candidates_for_skip) > 1:
+                candidates_for_skip_discounted = list(
+                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
+                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
+                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
+                for af_index in candidates_for_skip:
+                    if af_index == af_not_to_skip:
+                        # do not skip the AF with the lowest runtime and give it a clean slate
+                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
+                        continue
+                    skip_af_index.append(af_index)
+                    if len(skip_af_index) >= len(aqfs):
+                        raise ValueError("There are no acquisition functions left! This should not happen...")
+            time_af_selection = time.perf_counter_ns()
+
+            # printing timings
+            if self.log_timings:
+                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
+                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
+                time_taken_eval = round(time_eval - time_afs, 3) / 1000
+                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
+                time_taken_total = round(time_af_selection - time_start, 3) / 1000
+                print(
+                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
+                    flush=True)
+        return self.results
+
+    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        aqfs = self.multi_afs
+        discount_factor = self.multi_afs_discount_factor
+        required_improvement_factor = self.multi_afs_required_improvement_factor
+        required_improvement_worse = 1 + required_improvement_factor
+        required_improvement_better = 1 - required_improvement_factor
+        min_required_count = self.af_params['skip_duplicate_after']
+        skip_af_index = list()
+        single_af = len(aqfs) <= len(skip_af_index) + 1
+        af_observations = [list(), list(), list()]
+        af_performs_worse_count = [0, 0, 0]
+        af_performs_better_count = [0, 0, 0]
+        while self.fevals < max_fevals:
+            if single_af:
+                return self.__optimize(max_fevals)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            observations_median = np.median(self.__valid_observations)
+            if increase_precision is False:
+                predictions = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(predictions[1])
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                if increase_precision is True:
+                    predictions = self.predict_list(self.unvisited_cache)
+                    hyperparam = self.contextual_variance(predictions[1])
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                # to avoid going out of bounds on the next iteration, remove the best_af
+                predictions = (np.delete(predictions[0], best_af), np.delete(predictions[1], best_af))
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if increase_precision is True:
+                    self.train_model_hyperparams()
+                # we use the registered observations for maximization of the discounted reward
+                if observation != self.invalid_value:
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[af_index].append(reg_observation)
+                else:
+                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
+                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
+                    af_observations[af_index].append(reg_invalid_observation)
+            if increase_precision is False:
+                self.train_model_hyperparams()
+
+            # calculate the mean of discounted observations over the remaining acquisition functions
+            discounted_obs = list(
+                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
+            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
+
+            # register which AFs perform more than 10% better than average and which more than 10% worse than average
+            for af_index, discounted_observation in enumerate(discounted_obs):
+                if discounted_observation > disc_obs_mean * required_improvement_worse:
+                    af_performs_worse_count[af_index] += 1
+                elif discounted_observation < disc_obs_mean * required_improvement_better:
+                    af_performs_better_count[af_index] += 1
+
+            # find the worst AF, discounted observations is leading for a draw
+            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
+            af_index_worst = -1
+            if worst_count >= min_required_count:
+                for af_index, count in enumerate(af_performs_worse_count):
+                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
+                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
+                        af_index_worst = af_index
+
+            # skip the worst AF
+            if af_index_worst > -1:
+                skip_af_index.append(af_index_worst)
+                # reset the counts to even the playing field for the remaining AFs
+                af_performs_worse_count = [0, 0, 0]
+                af_performs_better_count = [0, 0, 0]
+                # if there is only one AF left, register as single AF
+                if len(aqfs) <= len(skip_af_index) + 1:
+                    single_af = True
+                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+                    assert len(af_indices_left) == 1
+                    self.__af = aqfs[af_indices_left[0]]
+            else:
+                # find the best AF, discounted observations is leading for a draw
+                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
+                af_index_best = -1
+                if best_count >= min_required_count:
+                    for af_index, count in enumerate(af_performs_better_count):
+                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
+                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
+                            af_index_best = af_index
+                # make the best AF single
+                if af_index_best > -1:
+                    single_af = True
+                    self.__af = aqfs[af_index_best]
+
+        return self.results
+
+    def __optimize_multi_fast(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        while self.fevals < max_fevals:
+            aqfs = self.multi_afs
+            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
+            predictions = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(predictions[1])
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            for af in aqfs:
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[0][best_af]    # to avoid going out of bounds
+                del predictions[1][best_af]
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.train_model_hyperparams()
+        return self.results
+
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = range(len(self.unvisited_cache))
+        shuffle(list_random)
+        return list_random
+
+    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Probability of Improvement (PI) """
+
+        # prefetch required data
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement
+        list_diff_improvement = -((fplus - x_mu) / (x_std + 1E-9))
+
+        # compute probability of improvement with CDF in bulk
+        list_prob_improvement = norm.cdf(list_diff_improvement)
+        return list_prob_improvement
+
+    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Expected Improvement (EI) """
+
+        # prefetch required data
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement, CDF and PDF in bulk
+        list_diff_improvement = (fplus - x_mu) / (x_std + 1E-9)
+        list_cdf = norm.cdf(list_diff_improvement)
+        list_pdf = norm.pdf(list_diff_improvement)
+
+        # compute expected improvement in bulk
+        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
+        return list_exp_improvement
+
+    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (LCB) """
+
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        beta = hyperparam
+
+        # compute LCB in bulk
+        list_lower_confidence_bound = (x_mu - beta * x_std)
+        return list_lower_confidence_bound
+
+    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
+
+        # prefetch required data
+        x_mu, x_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+
+        # precompute beta parameter
+        zeta = self.af_params['zeta']
+        t = self.fevals
+        d = self.num_dimensions
+        delta = hyperparam
+        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
+
+        # compute UCB in bulk
+        list_lower_confidence_bound = (x_mu - beta * x_std)
+        return list_lower_confidence_bound
+
+    def visualize_after_opt(self):
+        """ Visualize the model after the optimization """
+        print(self.__model.kernel_.get_params())
+        print(self.__model.log_marginal_likelihood())
+        import matplotlib.pyplot as plt
+        mu, std = self.predict_list(self.searchspace)
+        brute_force_observations = list()
+        for param_config in self.searchspace:
+            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+            if obs == self.invalid_value:
+                obs = None
+            brute_force_observations.append(obs)
+        x_axis = range(len(mu))
+        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
+        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
+        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
+        plt.legend()
+        plt.show()
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
new file mode 100644
index 000000000..891db5236
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
@@ -0,0 +1,83 @@
+""" BOTorch package from https://github.com/pytorch/botorch """
+from __future__ import print_function
+
+from collections import OrderedDict
+import numpy as np
+
+try:
+    import torch
+    from botorch.models import SingleTaskGP
+    from botorch.fit import fit_gpytorch_model
+    from botorch.utils import standardize
+    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from botorch.acquisition import UpperConfidenceBound
+    from botorch.optim import optimize_acqf
+except Exception:
+    BayesianOptimization = None
+    bayes_opt_present = False
+
+from kernel_tuner.strategies import minimize
+
+supported_methods = ["poi", "ei", "ucb"]
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    if not bayes_opt_present:
+        raise ImportError("Error: optional dependency Bayesian Optimization not installed")
+    init_points = tuning_options.strategy_options.get("popsize", 20)
+    n_iter = tuning_options.strategy_options.get("max_fevals", 100)
+
+    # defaults as used by Bayesian Optimization Python package
+    acq = tuning_options.strategy_options.get("method", "ucb")
+    kappa = tuning_options.strategy_options.get("kappa", 2.576)
+    xi = tuning_options.strategy_options.get("xi", 0.0)
+
+    tuning_options["scaling"] = True
+
+    results = []
+
+    # function to pass to the optimizer
+    def func(**kwargs):
+        args = [kwargs[key] for key in tuning_options.tune_params.keys()]
+        return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results)
+
+    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
+    pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds))
+
+    verbose = 0
+    if tuning_options.verbose:
+        verbose = 2
+
+    # print(np.isnan(init_points).any())
+
+    optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose)
+
+    optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi)
+
+    if tuning_options.verbose:
+        print(optimizer.max)
+
+    return results, runner.dev.get_environment()
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
new file mode 100644
index 000000000..6107fad0b
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_old.py
@@ -0,0 +1,837 @@
+""" Bayesian Optimization implementation from the thesis by Willemsen """
+from copy import deepcopy
+from random import randint, shuffle
+import itertools
+import warnings
+import time
+
+import numpy as np
+
+# BO imports
+try:
+    from typing import Tuple
+    from scipy.stats import norm
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
+    from sklearn.exceptions import ConvergenceWarning
+    from skopt.sampler import Lhs
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from kernel_tuner.strategies import minimize
+from kernel_tuner import util
+
+supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
+
+
+def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
+    """ Generates normalization and denormalization dictionaries """
+    original_to_normalized = dict()
+    normalized_to_original = dict()
+    for param_name in tune_params.keys():
+        original_to_normalized_dict = dict()
+        normalized_to_original_dict = dict()
+        for value_index, value in enumerate(tune_params[param_name]):
+            normalized_value = eps * value_index + 0.5 * eps
+            normalized_to_original_dict[normalized_value] = value
+            original_to_normalized_dict[value] = normalized_value
+        original_to_normalized[param_name] = original_to_normalized_dict
+        normalized_to_original[param_name] = normalized_to_original_dict
+    return original_to_normalized, normalized_to_original
+
+
+def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
+    """ Normalize the parameter space given a normalization dictionary """
+    keys = list(tune_params.keys())
+    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
+    return param_space_normalized
+
+
+def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
+    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    pruned_tune_params_mask = list()
+    removed_tune_params = list()
+    param_names = list(tune_params.keys())
+    for index, key in enumerate(tune_params.keys()):
+        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
+        if len(tune_params[key]) > 1:
+            removed_tune_params.append(None)
+        else:
+            value = tune_params[key][0]
+            normalized = normalize_dict[param_names[index]][value]
+            removed_tune_params.append(normalized)
+    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
+        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
+    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
+    return parameter_space, removed_tune_params
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process. Allows setting hyperparameters via the strategy_options key.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
+    if not bayes_opt_present:
+        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed")
+
+    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
+    tune_params = tuning_options.tune_params
+    tuning_options["scaling"] = True
+    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
+
+    # compute cartesian product of all tunable parameters
+    parameter_space = itertools.product(*tune_params.values())
+
+    # check for search space restrictions
+    if tuning_options.restrictions is not None:
+        tuning_options.verbose = False
+    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
+    parameter_space = list(parameter_space)
+    if len(parameter_space) < 1:
+        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
+    if len(parameter_space) == 1:
+        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
+
+    # normalize search space to [0,1]
+    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
+    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
+
+    # prune the parameter space to remove dimensions that have a constant parameter
+    if prune_parameterspace:
+        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
+    else:
+        parameter_space = list(parameter_space)
+        removed_tune_params = [None] * len(tune_params.keys())
+
+    # initialize and optimize
+    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
+    results = bo.optimize(max_fevals)
+
+    return results, runner.dev.get_environment()
+
+
+class BayesianOptimization():
+
+    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
+                 runner, opt_direction='min'):
+        time_start = time.perf_counter_ns()
+
+        # supported hyperparameter values
+        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
+        self.supported_methods = supported_methods
+        self.supported_sampling_methods = ["random", "lhs"]
+        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
+
+        def get_hyperparam(name: str, default, supported_values=list()):
+            value = tuning_options.strategy_options.get(name, default)
+            if len(supported_values) > 0 and value not in supported_values:
+                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+            return value
+
+        # get hyperparameters
+        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
+        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
+        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
+        acq = acquisition_function
+        acq_params = get_hyperparam("methodparams", {})
+        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
+        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
+        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
+        self.num_initial_samples = get_hyperparam("popsize", 20)
+        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
+        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
+        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
+
+        # set acquisition function hyperparameter defaults where missing
+        if 'explorationfactor' not in acq_params:
+            acq_params['explorationfactor'] = 'CV'
+        if 'zeta' not in acq_params:
+            acq_params['zeta'] = 1
+        if 'skip_duplicate_after' not in acq_params:
+            acq_params['skip_duplicate_after'] = 5
+
+        # set arguments
+        self.kernel_options = kernel_options
+        self.tuning_options = tuning_options
+        self.tune_params = tuning_options.tune_params
+        self.param_names = list(self.tune_params.keys())
+        self.normalized_dict = normalize_dict
+        self.denormalized_dict = denormalize_dict
+        self.runner = runner
+        self.max_threads = runner.dev.max_threads
+        self.log_timings = False
+
+        # set optimization constants
+        self.invalid_value = 1e20
+        self.opt_direction = opt_direction
+        if opt_direction == 'min':
+            self.worst_value = np.PINF
+            self.argopt = np.argmin
+        elif opt_direction == 'max':
+            self.worst_value = np.NINF
+            self.argopt = np.argmax
+        else:
+            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
+
+        # set the acquisition function and surrogate model
+        self.optimize = self.__optimize
+        self.af_name = acquisition_function
+        self.af_params = acq_params
+        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
+        self.set_acquisition_function(acquisition_function)
+        self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
+
+        # set remaining values
+        self.results = []
+        self.__searchspace = searchspace
+        self.removed_tune_params = removed_tune_params
+        self.searchspace_size = len(self.searchspace)
+        self.num_dimensions = len(self.dimensions())
+        self.__current_optimum = self.worst_value
+        self.cv_norm_maximum = None
+        self.fevals = 0
+        self.__visited_num = 0
+        self.__visited_valid_num = 0
+        self.__visited_searchspace_indices = [False] * self.searchspace_size
+        self.__observations = [np.NaN] * self.searchspace_size
+        self.__valid_observation_indices = [False] * self.searchspace_size
+        self.__valid_params = list()
+        self.__valid_observations = list()
+        self.unvisited_cache = self.unvisited()
+        time_setup = time.perf_counter_ns()
+        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
+
+        # take initial sample
+        if self.num_initial_samples > 0:
+            self.initial_sample()
+            time_initial_sample = time.perf_counter_ns()
+
+        # print the timings
+        if self.log_timings:
+            time_taken_setup = round(time_setup - time_start, 3) / 1000
+            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
+            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
+            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
+
+    @property
+    def searchspace(self):
+        return self.__searchspace
+
+    @property
+    def observations(self):
+        return self.__observations
+
+    @property
+    def current_optimum(self):
+        return self.__current_optimum
+
+    @current_optimum.setter
+    def current_optimum(self, value: float):
+        self.__current_optimum = value
+
+    def is_better_than(self, a: float, b: float) -> bool:
+        """ Determines which one is better depending on optimization direction """
+        return a < b if self.opt_direction == 'min' else a > b
+
+    def is_not_visited(self, index: int) -> bool:
+        """ Returns whether a searchspace index has not been visited """
+        return not self.__visited_searchspace_indices[index]
+
+    def is_valid(self, observation: float) -> bool:
+        """ Returns whether an observation is valid """
+        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+
+    def get_af_by_name(self, name: str):
+        """ Get the basic acquisition functions by their name """
+        basic_af_names = ['ei', 'poi', 'lcb']
+        if name == 'ei':
+            return self.af_expected_improvement
+        elif name == 'poi':
+            return self.af_probability_of_improvement
+        elif name == 'lcb':
+            return self.af_lower_confidence_bound
+        raise ValueError(f"{name} not in {basic_af_names}")
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function """
+        if acquisition_function == 'poi':
+            self.__af = self.af_probability_of_improvement
+        elif acquisition_function == 'ei':
+            self.__af = self.af_expected_improvement
+        elif acquisition_function == 'lcb':
+            self.__af = self.af_lower_confidence_bound
+        elif acquisition_function == 'lcb-srinivas':
+            self.__af = self.af_lower_confidence_bound_srinivas
+        elif acquisition_function == 'random':
+            self.__af = self.af_random
+        elif acquisition_function == 'multi':
+            self.optimize = self.__optimize_multi
+        elif acquisition_function == 'multi-advanced':
+            self.optimize = self.__optimize_multi_advanced
+        elif acquisition_function == 'multi-fast':
+            self.optimize = self.__optimize_multi_fast
+        else:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
+        """ Set the surrogate model with a covariance function and lengthscale """
+        if cov_kernel_name == "constantrbf":
+            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "rbf":
+            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern32":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
+        elif cov_kernel_name == "matern52":
+            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
+        else:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_cov_kernels, cov_kernel_name))
+        self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
+
+    def valid_params_observations(self) -> Tuple[list, list]:
+        """ Returns a list of valid observations and their parameter configurations """
+        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
+        params = list()
+        observations = list()
+        for index, valid in enumerate(self.__valid_observation_indices):
+            if valid is True:
+                params.append(self.searchspace[index])
+                observations.append(self.observations[index])
+        return params, observations
+
+    def unvisited(self) -> list:
+        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
+        return params
+
+    def find_param_config_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the search space if it exists """
+        return self.searchspace.index(param_config)
+
+    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
+        """ Find a parameter config index in the unvisited cache if it exists """
+        return self.unvisited_cache.index(param_config)
+
+    def normalize_param_config(self, param_config: tuple) -> tuple:
+        """ Normalizes a parameter configuration """
+        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return normalized
+
+    def denormalize_param_config(self, param_config: tuple) -> tuple:
+        """ Denormalizes a parameter configuration """
+        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
+        return denormalized
+
+    def unprune_param_config(self, param_config: tuple) -> tuple:
+        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        unpruned = list()
+        pruned_count = 0
+        for removed in self.removed_tune_params:
+            if removed is not None:
+                unpruned.append(removed)
+            else:
+                unpruned.append(param_config[pruned_count])
+                pruned_count += 1
+        return tuple(unpruned)
+
+    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
+        """ Adjust the visited and valid index records accordingly """
+        validity = self.is_valid(observation)
+        self.__visited_num += 1
+        self.__observations[index] = observation
+        self.__visited_searchspace_indices[index] = True
+        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
+        self.__valid_observation_indices[index] = validity
+        if validity is True:
+            self.__visited_valid_num += 1
+            self.__valid_params.append(param_config)
+            self.__valid_observations.append(observation)
+            if self.is_better_than(observation, self.current_optimum):
+                self.current_optimum = observation
+
+    def predict(self, x) -> Tuple[float, float]:
+        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        return self.__model.predict([x], return_std=True)
+
+    def predict_list(self, lst: list) -> Tuple[list, list, list]:
+        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            mu, std = self.__model.predict(lst, return_std=True)
+            return list(zip(mu, std)), mu, std
+
+    def fit_observations_to_model(self):
+        """ Update the model based on the current list of observations """
+        self.__model.fit(self.__valid_params, self.__valid_observations)
+
+    def evaluate_objective_function(self, param_config: tuple) -> float:
+        """ Evaluates the objective function """
+        param_config = self.unprune_param_config(param_config)
+        denormalized_param_config = self.denormalize_param_config(param_config)
+        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
+            return self.invalid_value
+        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+        self.fevals += 1
+        return val
+
+    def dimensions(self) -> list:
+        """ List of parameter values per parameter """
+        return self.tune_params.values()
+
+    def draw_random_sample(self) -> Tuple[list, int]:
+        """ Draw a random sample from the unvisited parameter configurations """
+        if len(self.unvisited_cache) < 1:
+            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
+        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
+        param_config = self.unvisited_cache[index]
+        actual_index = self.find_param_config_index(param_config)
+        return param_config, actual_index
+
+    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
+        """ Draws an LHS-distributed sample from the search space """
+        if self.searchspace_size < num_samples:
+            raise ValueError("Can't sample more than the size of the search space")
+        if self.sampling_crit is None:
+            lhs = Lhs(lhs_type="centered", criterion=None)
+        else:
+            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
+        param_configs = lhs.generate(self.dimensions(), num_samples)
+        indices = list()
+        normalized_param_configs = list()
+        for i in range(len(param_configs) - 1):
+            try:
+                param_config = self.normalize_param_config(param_configs[i])
+                index = self.find_param_config_index(param_config)
+                indices.append(index)
+                normalized_param_configs.append(param_config)
+            except ValueError:
+                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
+                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
+                continue
+        return list(zip(normalized_param_configs, indices))
+
+    def initial_sample(self):
+        """ Draws an initial sample using random sampling """
+        if self.num_initial_samples <= 0:
+            raise ValueError("At least one initial sample is required")
+        if self.sampling_method == 'lhs':
+            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
+        elif self.sampling_method == 'random':
+            samples = list()
+        else:
+            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
+        # collect the samples
+        collected_samples = 0
+        for params, index in samples:
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            if self.is_valid(observation):
+                collected_samples += 1
+        # collect the remainder of the samples
+        while collected_samples < self.num_initial_samples:
+            params, index = self.draw_random_sample()
+            observation = self.evaluate_objective_function(params)
+            self.update_after_evaluation(observation, index, params)
+            # check for validity to avoid having no actual initial samples
+            if self.is_valid(observation):
+                collected_samples += 1
+        self.fit_observations_to_model()
+        _, _, std = self.predict_list(self.unvisited_cache)
+        self.initial_sample_mean = np.mean(self.__valid_observations)
+        # Alternatively:
+        # self.initial_sample_std = np.std(self.__valid_observations)
+        # self.initial_sample_mean = np.mean(predictions)
+        self.initial_std = np.mean(std)
+        self.cv_norm_maximum = self.initial_std
+
+    def contextual_variance(self, std: list):
+        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        if not self.af_params['explorationfactor'] == 'CV':
+            return None
+        if self.opt_direction == 'min':
+            if self.current_optimum == self.worst_value:
+                return 0.01
+            if self.current_optimum <= 0:
+                # doesn't work well for minimization beyond 0, should that even be a thing?
+                return abs(np.mean(std) / self.current_optimum)
+            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
+            cv = np.mean(std) / improvement_over_initial_sample
+            # normalize if available
+            if self.cv_norm_maximum:
+                cv = cv / self.cv_norm_maximum
+            return cv
+        return np.mean(std) / self.current_optimum
+
+    def __optimize(self, max_fevals):
+        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        while self.fevals < max_fevals:
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
+            list_of_acquisition_values = self.__af(predictions, hyperparam)
+            # afterwards select the best AF value
+            best_af = self.argopt(list_of_acquisition_values)
+            candidate_params = self.unvisited_cache[best_af]
+            candidate_index = self.find_param_config_index(candidate_params)
+            observation = self.evaluate_objective_function(candidate_params)
+            self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.fit_observations_to_model()
+        return self.results
+
+    def __optimize_multi(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
+        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
+        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
+        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
+        discount_factor = self.multi_afs_discount_factor
+        # setup the registration of duplicates and runtimes
+        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
+        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
+        skip_af_index = list()
+        af_runtimes = [0, 0, 0]
+        af_observations = [list(), list(), list()]
+        initial_sample_mean = np.mean(self.__valid_observations)
+        while self.fevals < max_fevals:
+            time_start = time.perf_counter_ns()
+            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
+            aqfs = self.multi_afs
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            time_predictions = time.perf_counter_ns()
+            actual_candidate_params = list()
+            actual_candidate_indices = list()
+            actual_candidate_af_indices = list()
+            duplicate_candidate_af_indices = list()
+            duplicate_candidate_original_af_indices = list()
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                timer_start = time.perf_counter()
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                time_taken = time.perf_counter() - timer_start
+                af_runtimes[af_index] += time_taken
+                is_duplicate = best_af in actual_candidate_indices
+                if not is_duplicate:
+                    candidate_params = self.unvisited_cache[best_af]
+                    actual_candidate_params.append(candidate_params)
+                    actual_candidate_indices.append(best_af)
+                    actual_candidate_af_indices.append(af_index)
+                # register whether the AF suggested a duplicate candidate
+                duplicate_candidate_af_count[af_index].pop(0)
+                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
+                if is_duplicate:
+                    # find the index of the AF that first registered the duplicate
+                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
+                    # register that AF as duplicate as well
+                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
+                    duplicate_candidate_af_indices.append(af_index)
+                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
+            time_afs = time.perf_counter_ns()
+            # evaluate the non-duplicate candidates
+            for index, af_index in enumerate(actual_candidate_af_indices):
+                candidate_params = actual_candidate_params[index]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if observation != self.invalid_value:
+                    # we use the registered observations for maximization of the discounted reward
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
+                else:
+                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
+                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
+            for index, af_index in enumerate(duplicate_candidate_af_indices):
+                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
+                af_observations[af_index].append(original_observation)
+            self.fit_observations_to_model()
+            time_eval = time.perf_counter_ns()
+            # assert that all observation lists of non-skipped acquisition functions are of the same length
+            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
+            # find the AFs elligble for being skipped
+            candidates_for_skip = list()
+            for af_index, count in enumerate(duplicate_candidate_af_count):
+                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
+                    candidates_for_skip.append(af_index)
+            # do not skip the AF with the lowest runtime
+            if len(candidates_for_skip) > 1:
+                candidates_for_skip_discounted = list(
+                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
+                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
+                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
+                for af_index in candidates_for_skip:
+                    if af_index == af_not_to_skip:
+                        # do not skip the AF with the lowest runtime and give it a clean slate
+                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
+                        continue
+                    skip_af_index.append(af_index)
+                    if len(skip_af_index) >= len(aqfs):
+                        raise ValueError("There are no acquisition functions left! This should not happen...")
+            time_af_selection = time.perf_counter_ns()
+
+            # printing timings
+            if self.log_timings:
+                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
+                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
+                time_taken_eval = round(time_eval - time_afs, 3) / 1000
+                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
+                time_taken_total = round(time_af_selection - time_start, 3) / 1000
+                print(
+                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
+                    flush=True)
+        return self.results
+
+    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        if self.opt_direction != 'min':
+            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
+        aqfs = self.multi_afs
+        discount_factor = self.multi_afs_discount_factor
+        required_improvement_factor = self.multi_afs_required_improvement_factor
+        required_improvement_worse = 1 + required_improvement_factor
+        required_improvement_better = 1 - required_improvement_factor
+        min_required_count = self.af_params['skip_duplicate_after']
+        skip_af_index = list()
+        single_af = len(aqfs) <= len(skip_af_index) + 1
+        af_observations = [list(), list(), list()]
+        af_performs_worse_count = [0, 0, 0]
+        af_performs_better_count = [0, 0, 0]
+        while self.fevals < max_fevals:
+            if single_af:
+                return self.__optimize(max_fevals)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            observations_median = np.median(self.__valid_observations)
+            if increase_precision is False:
+                predictions, _, std = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(std)
+            for af_index, af in enumerate(aqfs):
+                if af_index in skip_af_index:
+                    continue
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                if increase_precision is True:
+                    predictions, _, std = self.predict_list(self.unvisited_cache)
+                    hyperparam = self.contextual_variance(std)
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[best_af]    # to avoid going out of bounds
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+                if increase_precision is True:
+                    self.fit_observations_to_model()
+                # we use the registered observations for maximization of the discounted reward
+                if observation != self.invalid_value:
+                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
+                    af_observations[af_index].append(reg_observation)
+                else:
+                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
+                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
+                    af_observations[af_index].append(reg_invalid_observation)
+            if increase_precision is False:
+                self.fit_observations_to_model()
+
+            # calculate the mean of discounted observations over the remaining acquisition functions
+            discounted_obs = list(
+                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
+            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
+
+            # register which AFs perform more than 10% better than average and which more than 10% worse than average
+            for af_index, discounted_observation in enumerate(discounted_obs):
+                if discounted_observation > disc_obs_mean * required_improvement_worse:
+                    af_performs_worse_count[af_index] += 1
+                elif discounted_observation < disc_obs_mean * required_improvement_better:
+                    af_performs_better_count[af_index] += 1
+
+            # find the worst AF, discounted observations is leading for a draw
+            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
+            af_index_worst = -1
+            if worst_count >= min_required_count:
+                for af_index, count in enumerate(af_performs_worse_count):
+                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
+                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
+                        af_index_worst = af_index
+
+            # skip the worst AF
+            if af_index_worst > -1:
+                skip_af_index.append(af_index_worst)
+                # reset the counts to even the playing field for the remaining AFs
+                af_performs_worse_count = [0, 0, 0]
+                af_performs_better_count = [0, 0, 0]
+                # if there is only one AF left, register as single AF
+                if len(aqfs) <= len(skip_af_index) + 1:
+                    single_af = True
+                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
+                    assert len(af_indices_left) == 1
+                    self.__af = aqfs[af_indices_left[0]]
+            else:
+                # find the best AF, discounted observations is leading for a draw
+                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
+                af_index_best = -1
+                if best_count >= min_required_count:
+                    for af_index, count in enumerate(af_performs_better_count):
+                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
+                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
+                            af_index_best = af_index
+                # make the best AF single
+                if af_index_best > -1:
+                    single_af = True
+                    self.__af = aqfs[af_index_best]
+
+        return self.results
+
+    def __optimize_multi_fast(self, max_fevals):
+        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        while self.fevals < max_fevals:
+            aqfs = self.multi_afs
+            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            for af in aqfs:
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[best_af]    # to avoid going out of bounds
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+            self.fit_observations_to_model()
+        return self.results
+
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = range(len(self.unvisited_cache))
+        shuffle(list_random)
+        return list_random
+
+    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Probability of Improvement (PI) """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement
+        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1E-9)) for (x_mu, x_std) in predictions)
+
+        # compute probability of improvement with CDF in bulk
+        list_prob_improvement = norm.cdf(list_diff_improvement)
+
+        return list_prob_improvement
+
+    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Expected Improvement (EI) """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        # precompute difference of improvement, CDF and PDF in bulk
+        list_diff_improvement = list((fplus - x_mu) / (x_std + 1E-9) for (x_mu, x_std) in predictions)
+        list_cdf = norm.cdf(list_diff_improvement)
+        list_pdf = norm.pdf(list_diff_improvement)
+
+        # specify AF calculation
+        def exp_improvement(index) -> float:
+            x_mu, x_std = predictions[index]
+            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
+            return -ei
+
+        # calculate AF
+        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
+        return list_exp_improvement
+
+    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (LCB) """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        beta = hyperparam
+
+        # compute LCB in bulk
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        return list_lower_confidence_bound
+
+    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
+
+        # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+
+        # precompute beta parameter
+        zeta = self.af_params['zeta']
+        t = self.fevals
+        d = self.num_dimensions
+        delta = hyperparam
+        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
+
+        # compute UCB in bulk
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
+        return list_lower_confidence_bound
+
+    def visualize_after_opt(self):
+        """ Visualize the model after the optimization """
+        print(self.__model.kernel_.get_params())
+        print(self.__model.log_marginal_likelihood())
+        import matplotlib.pyplot as plt
+        _, mu, std = self.predict_list(self.searchspace)
+        brute_force_observations = list()
+        for param_config in self.searchspace:
+            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
+            if obs == self.invalid_value:
+                obs = None
+            brute_force_observations.append(obs)
+        x_axis = range(len(mu))
+        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
+        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
+        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
+        plt.legend()
+        plt.show()

From 3273dd3e26e908d082a0259c6b88752437d6d3ca Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 12 Jan 2022 11:32:52 +0100
Subject: [PATCH 002/168] Completely new Bayesian Optimizaation implementation

---
 .gitignore                                    |   1 +
 kernel_tuner/core.py                          |  39 +-
 kernel_tuner/cupy.py                          |  32 +-
 kernel_tuner/interface.py                     |   3 +-
 kernel_tuner/python.py                        | 147 ++++
 kernel_tuner/strategies/bayes_opt_GPyTorch.py |  14 +-
 .../strategies/bayes_opt_GPyTorch_lean.py     | 746 ++++++++++++++++++
 kernel_tuner/util.py                          |  15 +-
 8 files changed, 956 insertions(+), 41 deletions(-)
 create mode 100644 kernel_tuner/python.py
 create mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py

diff --git a/.gitignore b/.gitignore
index 0bf256bf8..90d7e1c89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ examples/cuda/output
 deploy_key
 *.mod
 temp_*.*
+.DS_Store
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 68fee0b39..fac470b8b 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -16,17 +16,15 @@
 from kernel_tuner.cuda import CudaFunctions
 from kernel_tuner.opencl import OpenCLFunctions
 from kernel_tuner.c import CFunctions
+from kernel_tuner.python import PythonFunctions
 from kernel_tuner.nvml import NVMLObserver
 import kernel_tuner.util as util
 
-
 try:
     import torch
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-
-
 _KernelInstance = namedtuple("_KernelInstance", ["name", "kernel_source", "kernel_string", "temp_files", "threads", "grid", "params", "arguments"])
 
 
@@ -173,7 +171,8 @@ def get_suffix(self, index=0):
         _suffixes = {
             'CUDA': '.cu',
             'OpenCL': '.cl',
-            'C': '.c'
+            'C': '.c',
+            'Python': '.py'
         }
         try:
             return _suffixes[self.lang]
@@ -237,6 +236,8 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
             dev = OpenCLFunctions(device, platform, compiler_options=compiler_options, iterations=iterations, observers=observers)
         elif lang == "C":
             dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations)
+        elif lang == "Python":
+            dev = PythonFunctions(iterations=iterations)
         else:
             raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
 
@@ -507,23 +508,23 @@ def _default_verify_function(instance, answer, result_host, atol, verbose):
         if answer[i] is not None:    #skip None elements in the answer list
             if isinstance(answer[i], (np.ndarray, cp.ndarray)) and isinstance(arg, (np.ndarray, cp.ndarray)):
                 if answer[i].dtype != arg.dtype:
-                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " +
-                                    str(answer[i].dtype) + " != " + str(arg.dtype) + ".")
+                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " + str(answer[i].dtype) +
+                                    " != " + str(arg.dtype) + ".")
                 if answer[i].size != arg.size:
-                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " +
-                                    str(answer[i].size) + " != " + str(arg.size) + ".")
+                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " + str(answer[i].size) +
+                                    " != " + str(arg.size) + ".")
             elif isinstance(answer[i], torch.Tensor) and isinstance(arg, torch.Tensor):
                 if answer[i].dtype != arg.dtype:
-                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " +
-                                    str(answer[i].dtype) + " != " + str(arg.dtype) + ".")
+                    raise TypeError(f"Element {i} of the expected results list is not of the same dtype as the kernel output: " + str(answer[i].dtype) +
+                                    " != " + str(arg.dtype) + ".")
                 if answer[i].size() != arg.size():
-                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " +
-                                    str(answer[i].size) + " != " + str(arg.size) + ".")
+                    raise TypeError(f"Element {i} of the expected results list has a size different from " + "the kernel argument: " + str(answer[i].size) +
+                                    " != " + str(arg.size) + ".")
 
             elif isinstance(answer[i], np.number) and isinstance(arg, np.number):
                 if answer[i].dtype != arg.dtype:
-                    raise TypeError(f"Element {i} of the expected results list is not the same as the kernel output: " + str(answer[i].dtype) +
-                                    " != " + str(arg.dtype) + ".")
+                    raise TypeError(f"Element {i} of the expected results list is not the same as the kernel output: " + str(answer[i].dtype) + " != " +
+                                    str(arg.dtype) + ".")
             else:
                 #either answer[i] and argument have different types or answer[i] is not a numpy type
                 if not isinstance(answer[i], (np.ndarray, cp.ndarray, torch.Tensor)) or not isinstance(answer[i], np.number):
@@ -572,7 +573,6 @@ def _flatten(a):
     return correct
 
 
-
 #these functions facilitate compiling templated kernels with PyCuda
 def split_argument_list(argument_list):
     """split all arguments in a list into types and names"""
@@ -587,20 +587,24 @@ def split_argument_list(argument_list):
         name_list.append(match.group(2).strip())
     return type_list, name_list
 
+
 def apply_template_typenames(type_list, templated_typenames):
     """replace the typename tokens in type_list with their templated typenames"""
+
     def replace_typename_token(matchobj):
         """function for a whitespace preserving token regex replace"""
         #replace only the match, leaving the whitespace around it as is
         return matchobj.group(1) + templated_typenames[matchobj.group(2)] + matchobj.group(3)
+
     for i, arg_type in enumerate(type_list):
-        for k,v in templated_typenames.items():
+        for k, v in templated_typenames.items():
             #if the templated typename occurs as a token in the string, meaning that it is enclosed in
             #beginning of string or whitespace, and end of string, whitespace or star
             regex = r"(^|\s+)(" + k + r")($|\s+|\*)"
             sub = re.sub(regex, replace_typename_token, arg_type, re.S)
             type_list[i] = sub
 
+
 def get_templated_typenames(template_parameters, template_arguments):
     """based on the template parameters and arguments, create dict with templated typenames"""
     templated_typenames = {}
@@ -610,6 +614,7 @@ def get_templated_typenames(template_parameters, template_arguments):
             templated_typenames[typename] = template_arguments[i]
     return templated_typenames
 
+
 def wrap_templated_kernel(kernel_string, kernel_name):
     """rewrite kernel_string to insert wrapper function for templated kernel"""
     #parse kernel_name to find template_arguments and real kernel name
@@ -626,7 +631,7 @@ def wrap_templated_kernel(kernel_string, kernel_name):
 
     template_parameters = match.group(1).split(',')
     argument_list = match.group(2).split(',')
-    argument_list = [s.strip() for s in argument_list] #remove extra whitespace around 'type name' strings
+    argument_list = [s.strip() for s in argument_list]    #remove extra whitespace around 'type name' strings
 
     type_list, name_list = split_argument_list(argument_list)
 
diff --git a/kernel_tuner/cupy.py b/kernel_tuner/cupy.py
index 5750a94b5..f59c653ee 100644
--- a/kernel_tuner/cupy.py
+++ b/kernel_tuner/cupy.py
@@ -1,7 +1,6 @@
 """This module contains all Cupy specific kernel_tuner functions"""
 from __future__ import print_function
 
-
 import logging
 import time
 import numpy as np
@@ -18,6 +17,7 @@
 
 class CupyRuntimeObserver(BenchmarkObserver):
     """ Observer that measures time using CUDA events during benchmarking """
+
     def __init__(self, dev):
         self.dev = dev
         self.stream = dev.stream
@@ -26,10 +26,13 @@ def __init__(self, dev):
         self.times = []
 
     def after_finish(self):
-        self.times.append(cp.cuda.get_elapsed_time(self.start, self.end)) #ms
+        self.times.append(cp.cuda.get_elapsed_time(self.start, self.end))    #ms
 
     def get_results(self):
-        results = {"time": np.average(self.times), "times": self.times.copy()}
+        results = {
+            "time": np.average(self.times),
+            "times": self.times.copy()
+        }
         self.times = []
         return results
 
@@ -55,7 +58,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
         self.texrefs = []
         if not cp:
             raise ImportError("Error: cupy not installed, please install e.g. " +
-                            "using 'pip install cupy-cuda111', please check https://github.com/cupy/cupy.")
+                              "using 'pip install cupy-cuda111', please check https://github.com/cupy/cupy.")
 
         #select device
         self.dev = dev = cp.cuda.Device(device).__enter__()
@@ -87,7 +90,8 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
         #collect environment information
         env = dict()
         cupy_info = str(cp._cupyx.get_runtime_info()).split("\n")[:-1]
-        info_dict = {s.split(":")[0].strip():s.split(":")[1].strip() for s in cupy_info}
+        info_dict = {s.split(":")[0].strip(): s.split(":")[1].strip()
+                     for s in cupy_info}
         env["device_name"] = info_dict[f'Device {device} Name']
 
         env["cuda_version"] = cp.cuda.runtime.driverGetVersion()
@@ -123,11 +127,10 @@ def ready_argument_list(self, arguments):
                 alloc = cp.array(arg)
                 self.allocations.append(alloc)
                 gpu_args.append(alloc)
-            else: # if not a numpy array, just pass argument along
+            else:    # if not a numpy array, just pass argument along
                 gpu_args.append(arg)
         return gpu_args
 
-
     def compile(self, kernel_instance):
         """call the CUDA compiler to compile the kernel, return the device function
 
@@ -150,13 +153,11 @@ def compile(self, kernel_instance):
 
         options = tuple(compiler_options)
 
-        self.current_module = cp.RawModule(code=kernel_string, options=options,
-                                           name_expressions=[kernel_name])
+        self.current_module = cp.RawModule(code=kernel_string, options=options, name_expressions=[kernel_name])
 
         self.func = self.current_module.get_function(kernel_name)
         return self.func
 
-
     def benchmark(self, func, gpu_args, threads, grid):
         """runs the kernel and measures time repeatedly, returns average time
 
@@ -219,9 +220,10 @@ def copy_constant_memory_args(self, cmem_args):
             to be numpy objects, such as numpy.ndarray or numpy.int32, and so on.
         :type cmem_args: dict( string: numpy.ndarray, ... )
         """
-        logging.debug('copy_constant_memory_args called')
-        logging.debug('current module: ' + str(self.current_module))
-        raise NotImplementedError('CuPy backend does not yet support constant memory')
+        for k, v in cmem_args.items():
+            symbol = self.current_module.get_global(k)
+            constant_mem = cp.ndarray(v.shape, v.dtype, symbol)
+            constant_mem[:] = cp.asarray(v)
 
     def copy_shared_memory_args(self, smem_args):
         """add shared memory arguments to the kernel"""
@@ -302,4 +304,6 @@ def memcpy_htod(self, dest, src):
             src = cp.asarray(src)
         cp.copyto(dest, src)
 
-    units = {'time': 'ms'}
+    units = {
+        'time': 'ms'
+    }
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index ecb7f7197..475966adc 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -45,7 +45,7 @@
 except ImportError:
     torch = util.TorchPlaceHolder()
 
-from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt, greedy_mls, greedy_ils, ordered_greedy_mls, dual_annealing, bayes_opt_old, bayes_opt_GPyTorch, bayes_opt_alt_BOTorch
+from kernel_tuner.strategies import brute_force, random_sample, diff_evo, minimize, basinhopping, genetic_algorithm, mls, pso, simulated_annealing, firefly_algorithm, bayes_opt, greedy_mls, greedy_ils, ordered_greedy_mls, dual_annealing, bayes_opt_old, bayes_opt_GPyTorch, bayes_opt_GPyTorch_lean, bayes_opt_alt_BOTorch
 
 strategy_map = {
     "brute_force": brute_force,
@@ -65,6 +65,7 @@
     "bayes_opt": bayes_opt,
     "bayes_opt_old": bayes_opt_old,
     "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
+    "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_alt_BOTorch,
 }
 
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
new file mode 100644
index 000000000..9655b068d
--- /dev/null
+++ b/kernel_tuner/python.py
@@ -0,0 +1,147 @@
+""" This module contains the functionality for running and compiling C functions """
+
+from collections import namedtuple
+import platform
+import logging
+import importlib.util
+
+import numpy
+import numpy.ctypeslib
+
+from kernel_tuner.util import get_temp_filename, delete_temp_file, write_file
+
+# This represents an individual kernel argument.
+# It contains a numpy object (ndarray or number) and a ctypes object with a copy
+# of the argument data. For an ndarray, the ctypes object is a wrapper for the ndarray's data.
+Argument = namedtuple("Argument", ["numpy", "ctypes"])
+
+
+class PythonFunctions(object):
+    """Class that groups the code for running and compiling C functions"""
+
+    def __init__(self, iterations=7):
+        """instantiate PythonFunctions object used for interacting with Python code
+
+        :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
+        :type iterations: int
+        """
+        self.iterations = iterations
+        self.max_threads = 1024
+
+        #environment info
+        env = dict()
+        env["iterations"] = self.iterations
+        self.env = env
+        self.name = platform.processor()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc):
+        pass
+
+    def ready_argument_list(self, arguments):
+        """ready argument list to be passed to the Python function
+        """
+        return arguments
+
+    def compile(self, kernel_instance):
+        """ return the function from the kernel instance """
+
+        suffix = kernel_instance.kernel_source.get_user_suffix()
+        source_file = get_temp_filename(suffix=suffix)
+
+        spec = importlib.util.find_spec(kernel_instance.name)
+        foo = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(foo)
+        func = getattr(foo, kernel_instance.name)
+
+        self.params = kernel_instance.params
+
+        delete_temp_file(source_file)
+        return func
+
+    def benchmark(self, func, args, threads, grid):
+        """runs the kernel repeatedly, returns averaged returned value
+
+        The C function tuning is a little bit more flexible than direct CUDA
+        or OpenCL kernel tuning. The C function needs to measure time, or some
+        other quality metric you wish to tune on, on its own and should
+        therefore return a single floating-point value.
+
+        Benchmark runs the C function repeatedly and returns the average of the
+        values returned by the C function. The number of iterations is set
+        during the creation of the CFunctions object. For all measurements the
+        lowest and highest values are discarded and the rest is included in the
+        average. The reason for this is to be robust against initialization
+        artifacts and other exceptional cases.
+
+        :param func: A C function compiled for this specific configuration
+        :type func: ctypes._FuncPtr
+
+        :param args: A list of arguments to the function, order should match the
+            order in the code. The list should be prepared using
+            ready_argument_list().
+        :type args: list(Argument)
+
+        :param threads: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type threads: any
+
+        :param grid: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type grid: any
+
+        :returns: All execution times.
+        :rtype: dict()
+        """
+        result = dict()
+        result["times"] = []
+        for _ in range(self.iterations):
+            value = self.run_kernel(func, args, threads, grid)
+
+            #I would like to replace the following with actually capturing
+            #stderr and detecting the error directly in Python, it proved
+            #however that capturing stderr for non-Python functions from Python
+            #is a rather difficult thing to do
+            #
+            #The current, less than ideal, scheme uses the convention that a
+            #negative time indicates a 'too many resources requested for launch'
+            #which Kernel Tuner can silently ignore
+            if value < 0.0:
+                raise Exception("too many resources requested for launch")
+
+            result["times"].append(value)
+        result["time"] = numpy.mean(result["times"])
+        return result
+
+    def run_kernel(self, func, args, threads, grid):
+        """runs the kernel once, returns whatever the kernel returns
+
+        :param func: A C function compiled for this specific configuration
+        :type func: ctypes._FuncPtr
+
+        :param args: A list of arguments to the function, order should match the
+            order in the code. The list should be prepared using
+            ready_argument_list().
+        :type args: list(Argument)
+
+        :param threads: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type threads: any
+
+        :param grid: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type grid: any
+
+        :returns: A robust average of values returned by the C function.
+        :rtype: float
+        """
+        logging.debug("run_kernel")
+        logging.debug("arguments=" + str([str(arg) for arg in args]))
+
+        time = func(**self.params)
+
+        return time
+
+    units = {}
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
index 31b987ca6..784c7d6c0 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
@@ -49,7 +49,7 @@ def normalize_parameter_space(param_space: list, tune_params: dict, normalized:
     return param_space_normalized
 
 
-def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
+def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict: dict, max_threads: int):
     """ Pruning of the parameter space to remove dimensions that have a constant parameter """
     pruned_tune_params_mask = list()
     removed_tune_params = list()
@@ -64,6 +64,10 @@ def prune_parameter_space(parameter_space, tuning_options, tune_params, normaliz
             removed_tune_params.append(normalized)
     if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
         print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
+    # TODO check whether the number of pruned parameters is correct
+    # print(
+    #     f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}, by util: {util.get_number_of_valid_configs(tuning_options, max_threads)}"
+    # )
     parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
     return parameter_space, removed_tune_params
 
@@ -121,7 +125,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     # prune the parameter space to remove dimensions that have a constant parameter
     if prune_parameterspace:
-        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
+        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict, runner.dev.max_threads)
     else:
         parameter_space = list(parameter_space)
         removed_tune_params = [None] * len(tune_params.keys())
@@ -139,7 +143,7 @@ class ExactGPModel(gpytorch.models.ExactGP):
     def __init__(self, train_x, train_y, likelihood):
         super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
         self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
-        self.covar_module = gpytorch.kernels.MaternKernel(nu=1.5)    # TODO maybe try ScaleKernel(MaternKernel)
+        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
 
     def forward(self, x):
         mean_x = self.mean_module(x)
@@ -400,6 +404,7 @@ def predict(self, x) -> Tuple[float, float]:
     def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
         """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            # TODO use torch.cuda for GPU
             test_x = torch.Tensor(lst)
             observed_pred = self.__likelihood(self.__model(test_x))
             mu = observed_pred.mean
@@ -510,7 +515,7 @@ def train_model_hyperparams(self):
         # set the hyperparameters globally for reference
         self.hyperparams = {
             'loss': loss.item(),
-            'lengthscale': self.__model.covar_module.lengthscale.item(),
+            'lengthscale': self.__model.covar_module.base_kernel.lengthscale.item(),
             'noise': self.__model.likelihood.noise.item(),
         }
         # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
@@ -540,6 +545,7 @@ def initial_sample(self):
             # check for validity to avoid having no actual initial samples
             if self.is_valid(observation):
                 collected_samples += 1
+
         # instantiate the model with the initial sample
         self.__likelihood = gpytorch.likelihoods.GaussianLikelihood()
         self.__tparams = torch.Tensor(self.__valid_params)
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
new file mode 100644
index 000000000..8f8f0be30
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -0,0 +1,746 @@
+""" Lean implementation of Bayesian Optimization with GPyTorch """
+from copy import deepcopy
+from typing import Any, Tuple
+from random import randint, shuffle
+from math import floor, ceil
+import numpy as np
+from numpy.lib.function_base import diff
+import torch
+import gpytorch
+
+from skopt.sampler import Lhs
+from scipy.stats import norm
+
+from kernel_tuner.util import get_valid_configs, config_valid
+from kernel_tuner.strategies import minimize
+from torch.functional import Tensor
+from torch.nn import parameter
+
+supported_initial_sample_methods = ['lhs', 'index', 'random']
+supported_methods = ['ei', 'poi', 'random']
+supported_cov_kernels = ['matern', 'matern_scalekernel']
+supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
+supported_optimizers = ['LBFGS', 'Adam']
+
+
+def tune(runner, kernel_options, device_options, tuning_options):
+    """ Find the best performing kernel configuration in the parameter space
+
+    :params runner: A runner from kernel_tuner.runners
+    :type runner: kernel_tuner.runner
+
+    :param kernel_options: A dictionary with all options for the kernel.
+    :type kernel_options: kernel_tuner.interface.Options
+
+    :param device_options: A dictionary with all options for the device
+        on which the kernel should be tuned.
+    :type device_options: kernel_tuner.interface.Options
+
+    :param tuning_options: A dictionary with all options regarding the tuning
+        process.
+    :type tuning_options: kernel_tuner.interface.Options
+
+    :returns: A list of dictionaries for executed kernel configurations and their
+        execution times. And a dictionary that contains a information
+        about the hardware/software environment on which the tuning took place.
+    :rtype: list(dict()), dict()
+
+    """
+
+    # set CUDA availability
+    cuda_available = torch.cuda.is_available()
+    cuda_available = False
+    device = torch.device("cuda:0" if cuda_available else "cpu")
+    if cuda_available:
+        print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
+
+
+    # retrieve options with defaults
+    options = tuning_options.strategy_options
+    optimization_direction = options.get("optimization_direction", 'min')
+    num_initial_samples = options.get("popsize", 20)
+    max_fevals = options.get("max_fevals", 100)
+    max_threads = runner.dev.max_threads
+    if max_fevals < num_initial_samples:
+        raise ValueError(f"Maximum number of function evaluations ({max_fevals}) can not be lower than the number of initial samples ({num_initial_samples}) ")
+
+    # enabling scaling will unscale and snap inputs on evaluation, more efficient to keep unscale values in a lookup table
+    tuning_options["snap"] = True
+    tuning_options["scaling"] = False
+
+    # prune the search space using restrictions
+    # TODO look into the efficiency, especially for GEMM (56.47%)
+    parameter_space = get_valid_configs(tuning_options, max_threads)
+
+    # limit max_fevals to max size of the parameter space
+    max_fevals = min(len(parameter_space), max_fevals)
+
+    # execute Bayesian Optimization
+    BO = BayesianOptimization(parameter_space, kernel_options, tuning_options, runner, num_initial_samples, optimization_direction, device)
+    # BO.visualize()
+    all_results = BO.optimize(max_fevals)
+    # BO.visualize()
+
+    return all_results, runner.dev.get_environment()
+
+
+class ExactGPModel(gpytorch.models.ExactGP):
+
+    def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
+        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
+        self.mean_module = gpytorch.means.ZeroMean()
+        if cov_kernel_name == 'matern':
+            self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
+        elif cov_kernel_name == 'matern_scalekernel':
+            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+
+class BayesianOptimization:
+
+    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str, device: torch.device) -> None:
+        self.animate = False    # TODO remove
+
+        # set defaults
+        self.num_initial_samples = num_initial_samples
+        self.fevals = 0
+        self.all_results = []
+        self.unique_results = {}
+        self.current_optimal_config = None
+
+        # set Kernel Tuner data
+        self.kernel_options = kernel_options
+        self.tuning_options = tuning_options
+        self.runner = runner
+        self.max_threads = runner.dev.max_threads
+
+        # get tuning options
+        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "index", supported_initial_sample_methods)
+        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 50)
+        self.training_iter = self.get_hyperparam("trainingiter", 0)
+        self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
+        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5)
+        self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
+        self.optimizer_name = self.get_hyperparam("optimizer", "Adam", supported_optimizers)
+        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", 0.1)
+        acquisition_function_name = self.get_hyperparam("method", "ei", supported_methods)
+        af_params = self.get_hyperparam("methodparams", {})
+
+        # set acquisition function options
+        self.set_acquisition_function(acquisition_function_name)
+        if 'explorationfactor' not in af_params:
+            af_params['explorationfactor'] = 'CV'
+        self.af_params = af_params
+
+        # set Tensors
+        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
+        self.device = device
+        self.out_device = torch.device("cpu")
+        self.dtype = torch.double
+        self.size = len(parameter_space)
+        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
+        self.index_counter = torch.tensor(range(self.size))
+        self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
+        self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
+        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan             # x (param configs) and y (results) must be the same type
+        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device) * 1e-3
+
+        # transform non-numerical parameters to numerical, keep true_param_configs for evaluation function
+        self.param_configs, self.tune_params = self.transform_nonnumerical_params(parameter_space)
+        self.true_param_configs = parameter_space
+
+        # set scaling
+        self.scaled_input = True
+        self.scaled_output = True
+        if not self.scaled_input:
+            self.param_configs_scaled = self.param_configs
+        else:
+            self.apply_scaling_to_inputs()
+
+        # set optimization settings
+        self.invalid_value = 1e20
+        self.optimization_direction = optimization_direction
+        if self.optimization_direction == 'min':
+            self.is_better_than = lambda a, b: a < b
+            self.inf_value = np.PINF
+            self.opt = torch.min
+            self.argopt = torch.argmin
+        elif self.optimization_direction == 'max':
+            self.is_better_than = lambda a, b: a > b
+            self.inf_value = np.NINF
+            self.opt = torch.max
+            self.argopt = torch.argmax
+        else:
+            raise ValueError(f"Invalid optimization direction {self.optimization_direction}")
+
+        # set the model
+        self.current_optimum = self.inf_value
+        self.hyperparams = {
+            'loss': np.nan,
+            'lengthscale': np.nan,
+            'noise': np.nan,
+        }
+        self.initialize_model()
+
+    @property
+    def train_x(self):
+        """ Get the valid parameter configurations """
+        return self.param_configs_scaled[self.valid_configs].to(self.device)
+
+    @property
+    def train_y(self):
+        """ Get the valid results """
+        outputs = self.results[self.valid_configs]
+        if self.scaled_output:
+            # z-score, remove mean and make unit variance to scale it to N(0,1)
+            # alternatively, first min-max the outputs between -1 and +1 and apply a Fisher transformation (np.arctanh)
+            outputs = (outputs - outputs.mean()) / outputs.std()
+        return outputs
+
+    @property
+    def train_y_err(self):
+        """ Get the error on the valid results """
+        std = self.results_std[self.valid_configs]
+        if self.scaled_output and std.std() > 0.0:
+            std = (std - std.mean()) / std.std()
+        return std
+
+    @property
+    def test_x(self):
+        """ Get the not yet visited parameter configurations """
+        return self.param_configs_scaled[self.unvisited_configs].to(self.device)
+
+    @property
+    def test_x_unscaled(self):
+        """ Get the unscaled, not yet visited parameter configurations """
+        return self.param_configs[self.unvisited_configs]
+
+    @property
+    def invalid_x(self):
+        """ Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid) """
+        invalid_mask = (self.unvisited_configs == self.valid_configs)
+        return self.param_configs[invalid_mask]
+
+    def true_param_config_index(self, target_index: int) -> int:
+        """ The index required to get the true config param index when dealing with test_x """
+        # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
+
+        counter_masked = self.index_counter[self.unvisited_configs]
+        return counter_masked[target_index]
+
+    def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tensor:
+        """ Same as true_param_config_index, but for an array of targets in O(n) instead of O(n^2). Assumes the array is sorted in ascending order. """
+        # TODO same trick as true_param_config_index
+
+        true_indices = torch.full_like(target_indices, -1).to(self.device)
+        target_index_index = 0
+        target_index = target_indices[target_index_index]
+        count = -1
+        for index, value in enumerate(self.unvisited_configs):
+            if value == True:
+                count += 1
+            if count == target_index:
+                true_indices[target_index_index] = index
+                target_index_index += 1
+                if target_index_index == len(target_indices):
+                    break
+                target_index = target_indices[target_index_index]
+
+        return true_indices
+
+    def initialize_model(self):
+        """ Initialize the surrogate model """
+        self.initial_sample()
+
+        # create the model
+        if self.likelihood_name == 'Gaussian':
+            self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        elif self.likelihood_name == 'FixedNoise':
+            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=1.0e-4), learn_additional_noise=False)
+        self.likelihood = self.likelihood.to(self.device)
+        self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
+
+        # Find optimal model hyperparameters
+        self.model.train()
+        self.likelihood.train()
+        model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
+
+        # LBFGS is probably better as Adam is only first-order
+        if self.optimizer_name == 'LBFGS':
+            self.optimizer = torch.optim.LBFGS(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'Adam':
+            self.optimizer = torch.optim.Adam(model_parameters, lr=self.optimizer_learningrate)
+
+        self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model).to(self.device)
+        self.train_hyperparams(self.initial_training_iter)
+
+    def initial_sample(self):
+        """ Take an initial sample of the parameter space """
+        param_configs = list()
+
+        # first apply the initial sampling method
+        if self.initial_sample_method == 'lhs':
+            indices, param_configs = self.get_lhs_sample()
+            for index in indices:
+                # indices may be -1 because of parameter filtering etc., so we replace those with index-spaces samples
+                if index != -1:
+                    self.evaluate_config(index)
+        elif self.initial_sample_method == 'random':
+            while self.fevals < self.num_initial_samples:
+                param_config_index = randint(0, self.size - 1)
+                param_config = tuple(self.param_configs_scaled[param_config_index].tolist())
+                if param_config in param_configs:
+                    continue
+                param_configs.append(param_config)
+                self.evaluate_config(param_config_index)
+
+        # then take index-spaced samples until all samples are valid
+        while self.fevals < self.num_initial_samples:
+            least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
+            self.evaluate_config(least_evaluated_region_index)
+
+        # set the current optimum, initial sample mean and initial sample std
+        self.current_optimum = self.opt(self.train_y).item()
+        self.initial_sample_mean = self.train_y.mean().item()
+        self.initial_sample_std = None
+
+        # save a boolean mask of the initial samples
+        self.inital_sample_configs = self.valid_configs.detach().clone()
+
+    def get_lhs_sample(self) -> Tuple[list, list]:
+        """ Get a centered Latin Hypercube Sample """
+        param_configs = list()
+        n_samples = self.num_initial_samples
+        temp_param_configs = [[] for _ in range(n_samples)]
+        for param_values in self.tune_params.values():
+            l = len(param_values)
+
+            # determine the interval and offset
+            interval = l / n_samples
+            offset = 0
+            if l > n_samples:
+                # take the difference between the last index and the end of the list, and the first index and the start of the list
+                offset = ((l - 1 - interval * n_samples) - interval) / 2
+
+            # assemble the parameter configurations
+            for i in range(n_samples):
+                index = ceil(offset + interval * (i + 1)) - 1
+                temp_param_configs[i].append(param_values[index])
+
+        # set the actual parameter configurations
+        for param_config in temp_param_configs:
+            param_config = tuple(param_config)
+            param_configs.append(param_config)
+        param_configs = torch.tensor(param_configs, dtype=self.dtype).to(self.device)
+
+        # get the indices of the parameter configurations in O(n^2)
+        param_configs_indices = [-1 for _ in range(n_samples)]
+        for index, param_config in enumerate(self.param_configs):
+            for selected_index, selected_param_config in enumerate(param_configs):
+                if torch.allclose(selected_param_config, param_config, equal_nan=False) and index not in param_configs_indices:
+                    param_configs_indices[selected_index] = index
+
+        if param_configs_indices.count(-1) > n_samples / 2:
+            print(f"No good fit was found in {param_configs_indices.count(-1)} out of the {n_samples} samples. Perhaps try something other than LHS.")
+        return param_configs_indices, param_configs
+
+    def get_middle_index_of_least_evaluated_region(self) -> int:
+        """ Get the middle index of the region of parameter configurations that is the least visited """
+        # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
+        distance_counter = -1
+        distance_tensor = torch.zeros_like(self.unvisited_configs, dtype=torch.int)     # TODO check if .to(self.device) is faster or slower
+        for index, unvisited in enumerate(self.unvisited_configs):
+            if unvisited:
+                distance_counter += 1
+            if not unvisited:
+                distance_counter = 0
+            distance_tensor[index] = distance_counter
+
+        biggest_distance_index = distance_tensor.argmax()
+        biggest_distance = distance_tensor[biggest_distance_index].item()
+        middle_index = biggest_distance_index - round(biggest_distance / 2)
+        # print(f"Max distance {biggest_distance}, index: {middle_index}, between: {biggest_distance_index-biggest_distance}-{biggest_distance_index}")
+        return middle_index
+
+    def find_nearest(self, value, array: torch.Tensor):
+        """ Find the value nearest to the given value in the array """
+        index = (torch.abs(array - value)).argmin()
+        return array[index]
+
+    def train_hyperparams(self, training_iter: int):
+        """ Optimize the surrogate model hyperparameters iteratively """
+        self.model.train()
+        self.likelihood.train()
+
+        def closure():
+            self.optimizer.zero_grad()
+            output = self.model(self.train_x)    # get model output
+            try:
+                loss = -self.mll(output, self.train_y)    # calculate loss and backprop gradients
+                loss.backward()
+                return loss
+            except gpytorch.utils.errors.NotPSDError:
+                print(f"WARNING - matrix not positive definite during training")
+
+        loss = None
+        for _ in range(training_iter):
+            _loss = self.optimizer.step(closure)
+            if _loss is not None:
+                loss = _loss
+
+        # set the hyperparams to the new values
+        try:
+            lengthscale = self.model.covar_module.lengthscale.item()
+        except AttributeError:
+            lengthscale = self.model.covar_module.base_kernel.lengthscale.item()
+        self.hyperparams = {
+            'loss': float(loss.item()) if loss is not None else np.nan,
+            'lengthscale': float(lengthscale),
+            'noise': float(self.model.likelihood.noise.mean().detach()),
+        }
+
+        # get into evaluation (predictive posterior) mode
+        self.model.eval()
+        self.likelihood.eval()
+
+    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
+        """ Optimize the objective """
+        predictions_tuple = None
+        short_param_config_index = None
+        last_invalid = False
+        report_multiple_minima = round(self.size / 10)    # if more than 10% of the space is minima, print a warning
+        use_contextual_variance = self.af_params['explorationfactor'] == 'CV'
+        while self.fevals < max_fevals:
+            if last_invalid:
+                # TODO no need to get the predictions again as the predictions are unchanged, just set the invalid param config mean to the worst non-NAN value and the std to 0
+                # predictions_tuple[0][short_param_config_index] = torch.nanmean(predictions_tuple[0])
+                # predictions_tuple[1][short_param_config_index] = 0
+                predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
+            else:
+                predictions_tuple = self.predict_list()
+                if self.initial_sample_std is None:
+                    self.initial_sample_std = predictions_tuple[1].mean().item()
+            hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
+            acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
+            short_param_config_index = self.argopt(acquisition_values)
+            param_config_index = self.true_param_config_index(short_param_config_index)
+
+            # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
+            min_acquisition_function_value = acquisition_values[short_param_config_index]
+            indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
+            if len(indices_where_min) > 1:
+                # first get the true index for the minima
+                true_indices_where_min = self.true_param_config_indices(indices_where_min)
+                # then get the index of the least evaluated region
+                least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
+                # now find the minima closest to the least evaluated region
+                param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
+                short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
+                if len(indices_where_min) > report_multiple_minima:
+                    print(
+                        f"WARNING - after {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region"
+                    )
+
+            # evaluate and register the result
+            result = self.evaluate_config(param_config_index)
+            if result == self.invalid_value and short_param_config_index > -1:
+                # can't use last_invalid if there were multiple minima in the acquisition function values, because short_param_config_index will not be set
+                last_invalid = True
+            else:
+                last_invalid = False
+                self.model.set_train_data(self.train_x, self.train_y, strict=False)
+                if self.training_iter > 0:
+                    self.train_hyperparams(training_iter=self.training_iter)
+                # set the current optimum
+                self.current_optimum = self.opt(self.train_y).item()
+            # print(f"Valid: {len(self.train_x)}, unvisited: {len(self.test_x)}, invalid: {len(self.invalid_x)}, last invalid: {last_invalid}")
+            if self.animate:
+                self.visualize()
+
+        return self.all_results
+
+    def objective_function(self, param_config: tuple) -> float:
+        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results)
+
+    def evaluate_config(self, param_config_index: int) -> float:
+        """ Evaluates a parameter configuration, returns the time """
+        param_config = self.true_param_configs[param_config_index]
+        time = self.objective_function(param_config)
+        self.register_result(time, param_config_index)
+        self.update_unique_results()
+        self.fevals = len(self.unique_results)
+        return time
+
+    def register_result(self, result: float, param_config_index: int):
+        """ Registers the result to the Tensors and adds the hyperparameters to the results dict """
+        # set the unvisited Tensors
+        if self.unvisited_configs[param_config_index] == False:
+            raise ValueError(f"The param config index {param_config_index} was already set to False!")
+        self.unvisited_configs[param_config_index] = False
+
+        # set the results Tensors
+        last_result = self.all_results[-1]
+        if result != self.invalid_value:
+            self.valid_configs[param_config_index] = True
+            self.results[param_config_index] = result
+            assert last_result['time'] == result
+            self.results_std[param_config_index] = np.std(last_result['times'])
+
+        # add the current model parameters to the results dict
+        if len(self.all_results) < 1:
+            return
+        for key, value in self.hyperparams.items():
+            last_result[key] = value
+        self.all_results[-1] = last_result
+
+    def update_unique_results(self):
+        """ Updates the unique results dictionary """
+        record = self.all_results[-1]
+        # make a unique string by taking every value in a result, if it already exists, it is overwritten
+        self.unique_results.update({",".join([str(v) for k, v in record.items() if k in self.tuning_options.tune_params]): record["time"]})
+
+    def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        """ Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations """
+        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
+            observed_pred = self.likelihood(self.model(self.test_x))
+            mu = observed_pred.mean
+            std = observed_pred.variance.clamp(min=1e-9)    # TODO .sqrt() or not? looks like without is better
+            return mu, std
+
+    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        """ Remove an index from a tuple of predictions """
+        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
+
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = list(range(len(self.unvisited_param_configs)))
+        shuffle(list_random)
+        return list_random
+
+    def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
+        """ compute probability of improvement by assuming normality on the difference in improvement """
+        diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
+        diff_improvement = (diff_improvement - diff_improvement.mean()) / diff_improvement.std()    # force to N(0,1) with z-score
+        if self.optimization_direction == 'max':
+            diff_improvement = -diff_improvement
+        return diff_improvement
+
+    def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
+        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        if not self.af_params['explorationfactor'] == 'CV':
+            raise ValueError(f"Contextual Variance was called, but is not set as the exploration factor ({self.af_params['explorationfactor']})")
+        if self.optimization_direction == 'max':
+            raise NotImplementedError("Contextual Variance has not yet been implemented for maximisation")
+        if self.current_optimum == self.inf_value:
+            return 0.01
+        if self.scaled_output:
+            improvement_over_initial_sample = (abs(self.current_optimum) - self.initial_sample_mean) / self.initial_sample_std
+            improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
+            improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
+            # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
+            cv = max(np.log(1 - improvement_diff) + 0.1, 0.001)
+            return cv
+        else:
+            raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
+
+    def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
+        """ Acquisition function Probability of Improvement (PoI) tensor-based """
+
+        # prefetch required data
+        y_mu, y_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+
+        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
+        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
+        cdf = normal.cdf(diff_improvement)
+
+        # sanity check
+        if torch.all(cdf == cdf[0]):
+            raise ValueError("You need to scale the diff_improvement-values!")
+        return cdf
+
+    def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
+        """ Acquisition function Expected Improvement (EI) tensor-based """
+
+        # prefetch required data
+        y_mu, y_std = predictions
+        if hyperparam is None:
+            hyperparam = self.af_params['explorationfactor']
+        fplus = self.current_optimum - hyperparam
+        # fplus = torch.full_like(y_mu, fplus) TODO does this make a difference for performance?
+
+        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
+        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
+        cdf = normal.cdf(diff_improvement)
+        pdf = torch.exp(normal.log_prob(diff_improvement))
+
+        # sanity check
+        if torch.all(cdf == cdf[0]) or torch.all(pdf == pdf[0]):
+            raise ValueError("You need to scale the diff_improvement-values!")
+
+        # compute expected improvement in bulk
+        exp_improvement = (pdf + diff_improvement + y_std * cdf)
+        # alternative exp_improvement = y_std * (pdf + diff_improvement * cdf)
+        # alternative exp_improvement = -((fplus - y_mu) * cdf + y_std * pdf)
+        return exp_improvement
+
+    """                  """
+    """ Helper functions """
+    """                  """
+
+    def get_hyperparam(self, name: str, default, supported_values=list()):
+        """ Retrieve the value of a hyperparameter based on the name """
+        value = self.tuning_options.strategy_options.get(name, default)
+        if len(supported_values) > 0 and value not in supported_values:
+            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+        return value
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function based on the name """
+        if acquisition_function not in supported_methods:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+        if acquisition_function == 'poi':
+            self.acquisition_function = self.af_probability_of_improvement_tensor
+        elif acquisition_function == 'ei':
+            self.acquisition_function = self.af_expected_improvement_tensor
+        elif acquisition_function == 'random':
+            self.acquisition_function = self.af_random
+
+    def apply_scaling_to_inputs(self):
+        """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
+        # TODO look into the efficiency, especially for GEMM (18.54%)
+        self.scaled_inputs = torch.zeros_like(self.param_configs)
+        param_configs_scaled = torch.zeros_like(self.param_configs)
+
+        # first get the scaling factors of each parameter
+        v_min_list = list()
+        v_max_list = list()
+        unchanging_params_list = list()
+        for param_values in self.tune_params.values():
+            v_min = min(param_values)
+            v_max = max(param_values)
+            v_min_list.append(v_min)
+            v_max_list.append(v_max)
+            unchanging_params_list.append(v_min == v_max)
+
+        # then set each parameter value to the scaled value
+        for param_index in range(len(self.param_configs[0])):
+            v_min = v_min_list[param_index]
+            v_max = v_max_list[param_index]
+            v_diff = v_max - v_min
+            for param_config_index, param_config in enumerate(self.param_configs):
+                param_configs_scaled[param_config_index][param_index] = (param_config[param_index] - v_min) / v_diff
+
+        # finally remove parameters that are constant by applying a mask
+        unchanging_params_tensor = torch.tensor(unchanging_params_list, dtype=torch.bool)
+        if torch.all(unchanging_params_tensor == True):
+            raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
+        nonstatic_param_count = torch.count_nonzero(~unchanging_params_tensor)
+        self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
+        for param_config_index, param_config in enumerate(param_configs_scaled):
+            self.param_configs_scaled[param_config_index] = param_config[~unchanging_params_tensor]
+
+    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
+        """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
+        parameter_space = deepcopy(parameter_space)
+        number_of_params = len(parameter_space[0])
+
+        # find out which parameters have nonnumerical or mixed types, and create a range of integers instead
+        nonnumericals_exist = False
+        nonnumerical_type = torch.zeros(number_of_params, dtype=torch.bool)
+        nonnumerical_values = [ [] for _ in range(number_of_params) ]
+        tune_params = deepcopy(self.tuning_options.tune_params)
+        for param_index, (param_key, param_values) in enumerate(self.tuning_options.tune_params.items()):
+            if not all(isinstance(v, (int, float, complex)) for v in param_values):
+                nonnumericals_exist = True
+                nonnumerical_type[param_index] = True
+                nonnumerical_values[param_index] = param_values
+                tune_params[param_key] = range(len(param_values))
+
+        # overwrite the nonnumerical parameters with numerical parameters
+        if nonnumericals_exist:
+            self.tuning_options["snap"] = False     # snapping is only possible with numerical values
+            for param_config_index, param_config in enumerate(parameter_space):
+                parameter_space[param_config_index] = list(param_config)
+                for param_index, param_value in enumerate(param_config):
+                    if nonnumerical_type[param_index]:
+                        # just use the index of the non-numerical value instead of the value
+                        new_value = nonnumerical_values[param_index].index(param_value)
+                        parameter_space[param_config_index][param_index] = new_value
+
+        return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
+
+
+    def visualize(self):
+        """ Visualize the surrogate model and observations in a plot """
+        from matplotlib import pyplot as plt
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            # Initialize plot
+            f, ax = plt.subplots(1, 1, figsize=(10, 5))
+            ax.set_ylabel('Value')
+            ax.set_xlabel('Parameter')
+
+            param_configs = self.param_configs.to(self.out_device)
+
+            # get true function
+            objective_results = np.array([])
+            for param_config in param_configs:
+                result = self.objective_function(tuple(param_config.tolist()))
+                if result == self.invalid_value:
+                    result = np.nan
+                objective_results = np.append(objective_results, result)
+            if self.scaled_output:
+                objective_results = (objective_results - objective_results.mean()) / objective_results.std()
+
+            if len(param_configs[0]) == 1:
+                ax.plot(np.linspace(param_configs[0], param_configs[-1], self.size), objective_results, 'r')
+            else:
+                ax.plot(range(self.size), objective_results, 'r')
+
+            # take the parameter values for 1D, otherwise the indices
+            if len(param_configs[0]) == 1:
+                x_axis_param_configs = param_configs
+                test_x_x_axis = self.test_x_unscaled.squeeze().to(self.out_device).numpy()
+            else:
+                x_axis_param_configs = torch.tensor(range(self.size))
+                test_x_x_axis = x_axis_param_configs[self.unvisited_configs].to(self.out_device)
+
+            # Get upper and lower confidence bounds
+            observed_pred = self.likelihood(self.model(self.test_x))
+            lower, upper = observed_pred.confidence_region()
+            lower, upper = lower.to(self.out_device), upper.to(self.out_device)
+
+            # Plot initial sample as green stars
+            initial_sample_x_axis = x_axis_param_configs[self.inital_sample_configs].to(self.out_device)
+            initial_sample_y_axis = self.results[self.inital_sample_configs].to(self.out_device)
+            ax.plot(initial_sample_x_axis.numpy(), initial_sample_y_axis.numpy(), 'g*')
+
+            # Plot training data as black stars
+            mask_training_data_no_initial_sample = ~self.inital_sample_configs == self.valid_configs
+            training_x_axis = x_axis_param_configs[mask_training_data_no_initial_sample].to(self.out_device)
+            training_y_axis = self.results[mask_training_data_no_initial_sample].to(self.out_device)
+            ax.plot(training_x_axis.numpy(), training_y_axis.numpy(), 'k*')
+
+            # Plot predictive means as blue line
+            test_x_y_axis = observed_pred.mean.to(self.out_device)
+            ax.plot(test_x_x_axis, test_x_y_axis.numpy(), 'b')
+
+            # Shade between the lower and upper confidence bounds
+            ax.fill_between(test_x_x_axis, lower.numpy(), upper.numpy(), alpha=0.5)
+
+            # set the limits and legend
+            # ax.set_ylim(min(objective_results), max(filter(lambda x: x != self.invalid_value, objective_results)))
+            ax.legend(['Objective Function', 'Initial Sample', 'Observed Data', 'Mean', 'Confidence'])
+
+            if self.animate:
+                f.canvas.draw()
+                plt.pause(0.1)
+
+            plt.show()
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 71bf66c3e..c900f5347 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -259,12 +259,17 @@ def get_kernel_string(kernel_source, params=None):
     return kernel_string
 
 
-def get_number_of_valid_configs(tuning_options, max_threads):
-    """compute number of valid configurations in a search space based on restrictions and max_threads"""
+def get_valid_configs(tuning_options, max_threads) -> list:
+    """ compute valid configurations in a search space based on restrictions and max_threads"""
     parameter_space = itertools.product(*tuning_options.tune_params.values())
     if tuning_options.restrictions is not None:
-        parameter_space = filter(lambda p: util.config_valid(p, tuning_options, max_threads), parameter_space)
-    return len(list(parameter_space))
+        parameter_space = filter(lambda p: config_valid(p, tuning_options, max_threads), parameter_space)
+    return list(parameter_space)
+
+
+def get_number_of_valid_configs(tuning_options, max_threads) -> int:
+    """compute number of valid configurations in a search space based on restrictions and max_threads"""
+    return len(get_valid_configs(tuning_options, max_threads))
 
 
 def get_problem_size(problem_size, params):
@@ -388,7 +393,7 @@ def looks_like_a_filename(kernel_source):
             if s in kernel_source:
                 result = False
         # string must contain substring ".c", ".opencl", or ".F"
-        result = result and any([s in kernel_source for s in (".c", ".opencl", ".F")])
+        result = result and any([s in kernel_source for s in (".c", ".opencl", ".F", ".py")])
     logging.debug('kernel_source is a filename: %s' % str(result))
     return result
 

From 5e0bfdeecf4553a713db874256422965f49b5885 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 15 Jan 2022 13:58:20 +0100
Subject: [PATCH 003/168] Enormous improvement in both performance and speed
 with BO GPyTorch, also added parsing of restriction strings to functions for
 major performance improvement

---
 kernel_tuner/interface.py                     |   4 +
 .../strategies/bayes_opt_GPyTorch_lean.py     | 179 +++++++++---------
 kernel_tuner/util.py                          |  52 +++++
 3 files changed, 150 insertions(+), 85 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 475966adc..e5cddcdb8 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -418,6 +418,10 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
     # check whether block_size_names are used as expected
     util.check_block_size_params_names_list(block_size_names, tune_params)
 
+    # if the restrictions are not callable, make them (increases restrictions check performance significantly)
+    if restrictions is not None and not callable(restrictions):
+        restrictions = util.parse_restrictions(restrictions)
+
     if iterations < 1:
         raise ValueError("Iterations should be at least one!")
 
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index 8f8f0be30..594f4aa23 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -1,20 +1,16 @@
 """ Lean implementation of Bayesian Optimization with GPyTorch """
 from copy import deepcopy
 from typing import Any, Tuple
-from random import randint, shuffle
-from math import floor, ceil
+from random import randint, shuffle, choice
+from math import ceil
 import numpy as np
-from numpy.lib.function_base import diff
+from numpy.lib.arraysetops import unique
+from numpy.random import default_rng
 import torch
 import gpytorch
 
-from skopt.sampler import Lhs
-from scipy.stats import norm
-
 from kernel_tuner.util import get_valid_configs, config_valid
 from kernel_tuner.strategies import minimize
-from torch.functional import Tensor
-from torch.nn import parameter
 
 supported_initial_sample_methods = ['lhs', 'index', 'random']
 supported_methods = ['ei', 'poi', 'random']
@@ -48,8 +44,8 @@ def tune(runner, kernel_options, device_options, tuning_options):
     """
 
     # set CUDA availability
-    cuda_available = torch.cuda.is_available()
-    cuda_available = False
+    use_cuda = False
+    cuda_available = torch.cuda.is_available() and use_cuda
     device = torch.device("cuda:0" if cuda_available else "cpu")
     if cuda_available:
         print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
@@ -69,7 +65,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
-    # TODO look into the efficiency, especially for GEMM (56.47%)
     parameter_space = get_valid_configs(tuning_options, max_threads)
 
     # limit max_fevals to max size of the parameter space
@@ -119,9 +114,10 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.max_threads = runner.dev.max_threads
 
         # get tuning options
-        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "index", supported_initial_sample_methods)
+        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
+        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1)
         self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 50)
-        self.training_iter = self.get_hyperparam("trainingiter", 0)
+        self.training_iter = self.get_hyperparam("trainingiter", 3)
         self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
         self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5)
         self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
@@ -143,7 +139,7 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.dtype = torch.double
         self.size = len(parameter_space)
         self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
-        self.index_counter = torch.tensor(range(self.size))
+        self.index_counter = torch.arange(self.size)
         self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
         self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
         self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan             # x (param configs) and y (results) must be the same type
@@ -228,29 +224,13 @@ def invalid_x(self):
     def true_param_config_index(self, target_index: int) -> int:
         """ The index required to get the true config param index when dealing with test_x """
         # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
-
-        counter_masked = self.index_counter[self.unvisited_configs]
-        return counter_masked[target_index]
+        masked_counter = self.index_counter[self.unvisited_configs]
+        return masked_counter[target_index]
 
     def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tensor:
-        """ Same as true_param_config_index, but for an array of targets in O(n) instead of O(n^2). Assumes the array is sorted in ascending order. """
-        # TODO same trick as true_param_config_index
-
-        true_indices = torch.full_like(target_indices, -1).to(self.device)
-        target_index_index = 0
-        target_index = target_indices[target_index_index]
-        count = -1
-        for index, value in enumerate(self.unvisited_configs):
-            if value == True:
-                count += 1
-            if count == target_index:
-                true_indices[target_index_index] = index
-                target_index_index += 1
-                if target_index_index == len(target_indices):
-                    break
-                target_index = target_indices[target_index_index]
-
-        return true_indices
+        """ Same as true_param_config_index, but for an array of targets instead. """
+        masked_counter = self.index_counter[self.unvisited_configs]
+        return masked_counter.index_select(0, target_indices)
 
     def initialize_model(self):
         """ Initialize the surrogate model """
@@ -280,28 +260,35 @@ def initialize_model(self):
 
     def initial_sample(self):
         """ Take an initial sample of the parameter space """
-        param_configs = list()
+        list_param_config_indices = list()
+
+        # generate a random offset from a normal distribution to add to the sample indices
+        rng = default_rng()
+        if self.initial_sample_random_offset_factor > 0.5:
+            raise ValueError("Random offset factor should not be greater than 0.5 to avoid overlapping index offsets")
+        random_offset_size = (self.size / self.num_initial_samples) * self.initial_sample_random_offset_factor
+        random_offsets = np.round(rng.standard_normal(self.num_initial_samples) * random_offset_size)
 
         # first apply the initial sampling method
         if self.initial_sample_method == 'lhs':
-            indices, param_configs = self.get_lhs_sample()
-            for index in indices:
-                # indices may be -1 because of parameter filtering etc., so we replace those with index-spaces samples
-                if index != -1:
-                    self.evaluate_config(index)
+            indices = self.get_lhs_samples(random_offsets)
+            for param_config_index in indices.tolist():
+                list_param_config_indices.append(param_config_index)
+                self.evaluate_config(param_config_index)
         elif self.initial_sample_method == 'random':
             while self.fevals < self.num_initial_samples:
                 param_config_index = randint(0, self.size - 1)
-                param_config = tuple(self.param_configs_scaled[param_config_index].tolist())
-                if param_config in param_configs:
+                if param_config_index in list_param_config_indices:
                     continue
-                param_configs.append(param_config)
+                list_param_config_indices.append(param_config_index)
                 self.evaluate_config(param_config_index)
 
         # then take index-spaced samples until all samples are valid
         while self.fevals < self.num_initial_samples:
             least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-            self.evaluate_config(least_evaluated_region_index)
+            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size-1)
+            list_param_config_indices.append(param_config_index)
+            self.evaluate_config(param_config_index)
 
         # set the current optimum, initial sample mean and initial sample std
         self.current_optimum = self.opt(self.train_y).item()
@@ -311,10 +298,11 @@ def initial_sample(self):
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
 
-    def get_lhs_sample(self) -> Tuple[list, list]:
-        """ Get a centered Latin Hypercube Sample """
-        param_configs = list()
+    def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
+        """ Get a centered Latin Hypercube Sample with a random offset """
         n_samples = self.num_initial_samples
+
+        # first get the seperate parameter values to make possibly fictional distributed parameter configurations
         temp_param_configs = [[] for _ in range(n_samples)]
         for param_values in self.tune_params.values():
             l = len(param_values)
@@ -331,34 +319,59 @@ def get_lhs_sample(self) -> Tuple[list, list]:
                 index = ceil(offset + interval * (i + 1)) - 1
                 temp_param_configs[i].append(param_values[index])
 
-        # set the actual parameter configurations
-        for param_config in temp_param_configs:
-            param_config = tuple(param_config)
-            param_configs.append(param_config)
-        param_configs = torch.tensor(param_configs, dtype=self.dtype).to(self.device)
+        # create a tensor of the possibly fictional parameter configurations
+        param_configs = torch.tensor(list(tuple(param_config) for param_config in temp_param_configs), dtype=self.dtype).to(self.device)
+        param_configs = param_configs.unique(dim=0) # remove duplicates
+        n_samples_unique = len(param_configs)
+
+        # get the indices of the parameter configurations
+        num_params = len(self.param_configs[0])
+        minimum_required_num_matching_params = round(num_params * 0.75)  # set the number of parameter matches allowed to be dropped before the search is stopped
+        param_configs_indices = torch.full((n_samples_unique,), -1, dtype=torch.int)
+        for selected_index, selected_param_config in enumerate(param_configs):
+            # for each parameter configuration, count the number of matching parameters
+            required_num_matching_params = num_params
+            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
+            match_mask = (matching_params == required_num_matching_params)
+            # if there is not at least one matching parameter configuration, lower the required number of matching parameters
+            found_num_matching_param_configs = match_mask.count_nonzero()
+            while found_num_matching_param_configs < 1 and required_num_matching_params > minimum_required_num_matching_params:
+                required_num_matching_params -= 1
+                match_mask = (matching_params == required_num_matching_params)
+                found_num_matching_param_configs = match_mask.count_nonzero()
+
+            # if more than one possible parameter configuration has been found, pick a random one
+            if found_num_matching_param_configs > 1:
+                index = choice(self.index_counter[match_mask])
+            elif found_num_matching_param_configs == 1:
+                index = self.index_counter[match_mask].item()
+            else:
+                # if no matching parameter configurations were found
+                continue
 
-        # get the indices of the parameter configurations in O(n^2)
-        param_configs_indices = [-1 for _ in range(n_samples)]
-        for index, param_config in enumerate(self.param_configs):
-            for selected_index, selected_param_config in enumerate(param_configs):
-                if torch.allclose(selected_param_config, param_config, equal_nan=False) and index not in param_configs_indices:
-                    param_configs_indices[selected_index] = index
+            # set the selected index
+            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size-1)
 
-        if param_configs_indices.count(-1) > n_samples / 2:
-            print(f"No good fit was found in {param_configs_indices.count(-1)} out of the {n_samples} samples. Perhaps try something other than LHS.")
-        return param_configs_indices, param_configs
+        # filter -1 indices and duplicates that occurred because of the random offset
+        param_configs_indices = param_configs_indices[param_configs_indices >= 0]
+        param_configs_indices = param_configs_indices.unique().type(torch.int)
+        if len(param_configs_indices) < n_samples / 2:
+            print(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1.",
+                  f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params}).",
+                  "Perhaps try something other than LHS.")
+        return param_configs_indices
 
     def get_middle_index_of_least_evaluated_region(self) -> int:
         """ Get the middle index of the region of parameter configurations that is the least visited """
         # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
-        distance_counter = -1
-        distance_tensor = torch.zeros_like(self.unvisited_configs, dtype=torch.int)     # TODO check if .to(self.device) is faster or slower
-        for index, unvisited in enumerate(self.unvisited_configs):
-            if unvisited:
-                distance_counter += 1
-            if not unvisited:
-                distance_counter = 0
-            distance_tensor[index] = distance_counter
+        distance_tensor = torch.arange(self.size)
+
+        # first get the indices that were visited (must be in ascending order)
+        indices_visited = self.index_counter[~self.unvisited_configs]
+
+        # then reset the range after the visited index
+        for index_visited in indices_visited:
+            distance_tensor[index_visited:] = torch.arange(self.size - index_visited)
 
         biggest_distance_index = distance_tensor.argmax()
         biggest_distance = distance_tensor[biggest_distance_index].item()
@@ -542,7 +555,8 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
             improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
             # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
-            cv = max(np.log(1 - improvement_diff) + 0.1, 0.001)
+            x = 1 - min(max(1 - improvement_diff, 0.2), 0.0)
+            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2057, y=0.0.
             return cv
         else:
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
@@ -573,7 +587,6 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
         fplus = self.current_optimum - hyperparam
-        # fplus = torch.full_like(y_mu, fplus) TODO does this make a difference for performance?
 
         diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
         normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
@@ -615,37 +628,33 @@ def set_acquisition_function(self, acquisition_function: str):
 
     def apply_scaling_to_inputs(self):
         """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
-        # TODO look into the efficiency, especially for GEMM (18.54%)
-        self.scaled_inputs = torch.zeros_like(self.param_configs)
         param_configs_scaled = torch.zeros_like(self.param_configs)
 
         # first get the scaling factors of each parameter
         v_min_list = list()
-        v_max_list = list()
+        v_diff_list = list()
         unchanging_params_list = list()
         for param_values in self.tune_params.values():
             v_min = min(param_values)
             v_max = max(param_values)
             v_min_list.append(v_min)
-            v_max_list.append(v_max)
+            v_diff_list.append(v_max - v_min)
             unchanging_params_list.append(v_min == v_max)
 
         # then set each parameter value to the scaled value
         for param_index in range(len(self.param_configs[0])):
             v_min = v_min_list[param_index]
-            v_max = v_max_list[param_index]
-            v_diff = v_max - v_min
-            for param_config_index, param_config in enumerate(self.param_configs):
-                param_configs_scaled[param_config_index][param_index] = (param_config[param_index] - v_min) / v_diff
+            v_diff = v_diff_list[param_index]
+            param_configs_scaled[:,param_index] = torch.sub(self.param_configs[:,param_index], v_min).div(v_diff)
 
         # finally remove parameters that are constant by applying a mask
-        unchanging_params_tensor = torch.tensor(unchanging_params_list, dtype=torch.bool)
-        if torch.all(unchanging_params_tensor == True):
+        unchanging_params_tensor = ~torch.tensor(unchanging_params_list, dtype=torch.bool)
+        if torch.all(unchanging_params_tensor == False):
             raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
-        nonstatic_param_count = torch.count_nonzero(~unchanging_params_tensor)
+        nonstatic_param_count = torch.count_nonzero(unchanging_params_tensor)
         self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
         for param_config_index, param_config in enumerate(param_configs_scaled):
-            self.param_configs_scaled[param_config_index] = param_config[~unchanging_params_tensor]
+            self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
 
     def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
         """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
@@ -709,7 +718,7 @@ def visualize(self):
                 x_axis_param_configs = param_configs
                 test_x_x_axis = self.test_x_unscaled.squeeze().to(self.out_device).numpy()
             else:
-                x_axis_param_configs = torch.tensor(range(self.size))
+                x_axis_param_configs = torch.arange(self.size)
                 test_x_x_axis = x_axis_param_configs[self.unvisited_configs].to(self.out_device)
 
             # Get upper and lower confidence bounds
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index c900f5347..838a54b97 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -8,6 +8,7 @@
 import logging
 import warnings
 import re
+from types import FunctionType
 
 import numpy as np
 try:
@@ -669,3 +670,54 @@ def dump_cache(obj: str, tuning_options):
     if isinstance(tuning_options.cache, dict) and tuning_options.cachefile:
         with open(tuning_options.cachefile, "a") as cachefile:
             cachefile.write(obj)
+
+
+def parse_restrictions(restrictions: str):
+    """" parses restrictions from a list of strings into a callable function """
+    operators = [ '+', '-', '*', '/', '%', '==', '!=', '(', ')', '[', ']' ]
+
+    suffix = ' and '
+    parsed_restrictions = ""
+    for restriction in restrictions:
+        new = ""
+
+        # first make sure everything that should be space-seperated is
+        for index in range(len(restriction)):
+            if restriction[index] in operators and index > 0 and restriction[index-1] != ' ':
+                new += ' '
+            new += restriction[index]
+            if restriction[index] in operators and index < len(restriction) - 1 and restriction[index+1] != ' ':
+                new += ' '
+
+        restriction = new
+
+        # then parse each part
+        new = ""
+        words = restriction.split(" ")
+        for word in words:
+
+            # filter spaces and empty words
+            if word == ' ' or word == '':
+                continue
+
+            # filter the operators
+            if word in operators:
+                new += word + ' '
+                continue
+
+            # filter numbers
+            if np.char.isnumeric(word):
+                new += word + ' '
+                continue
+
+            # make variables a dictionary 'p' lookup
+            word = f"params['{word}']"
+            new += word
+            new += ' '
+
+        parsed_restrictions += (new + suffix)
+
+    parsed_restrictions = "def restrictions(params): \n return " + parsed_restrictions[:-len(suffix)]
+    code_object = compile(parsed_restrictions, '<string>', 'exec')
+    func = FunctionType(code_object.co_consts[0], globals())
+    return func

From e355c58700866acdfe8ab6744f7430e54aa47834 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 16 Feb 2022 09:55:28 +0100
Subject: [PATCH 004/168] Made experimental Python runner parallel, completely
 new hyperparameter tuning metric, improvements to the BO GPyTorch lean
 implementation

---
 .gitattributes                                |   0
 .github/workflows/docs.yml                    |   0
 .github/workflows/python-app.yml              |   0
 .gitignore                                    |   0
 .zenodo.json                                  |   0
 CHANGELOG.md                                  |   0
 CITATION.cff                                  |   0
 CONTRIBUTING.rst                              |   0
 INSTALL.rst                                   |   0
 LICENSE                                       |   0
 MANIFEST.in                                   |   0
 README.rst                                    |   0
 doc/Makefile                                  |   0
 doc/deploy.sh                                 |   0
 doc/gemm-amd-summary.png                      | Bin
 doc/gh_pages-deploy_key.enc                   | Bin
 doc/source/conf.py                            |   0
 doc/source/contributing.rst                   |   0
 doc/source/correctness.rst                    |   0
 doc/source/design.rst                         |   0
 doc/source/examples.rst                       |   0
 doc/source/hostcode.rst                       |   0
 doc/source/index.rst                          |   0
 doc/source/install.rst                        |   0
 doc/source/templates.rst                      |   0
 doc/source/user-api.rst                       |   0
 doc/source/vocabulary.rst                     |   0
 examples/README.rst                           |   0
 examples/c/matrix_multiply.cpp                |   0
 examples/c/matrix_multiply.py                 |   0
 examples/cuda/convolution.cu                  |   0
 examples/cuda/convolution_streams.cu          |   0
 examples/cuda/expdist.cu                      |   0
 examples/cuda/matmul.cu                       |   0
 examples/cuda/pnpoly.cu                       |   0
 examples/cuda/pnpoly_host.cu                  |   0
 examples/cuda/reduction.cu                    |   0
 examples/cuda/spmv.cu                         |   0
 examples/cuda/stencil.cu                      |   0
 examples/cuda/texture.py                      |   0
 examples/cuda/vector_add_jinja.cu             |   0
 examples/cuda/vector_add_jinja2.py            |   0
 examples/cuda/zeromeanfilter.cu               |   0
 examples/fortran/vector_add.F90               |   0
 examples/fortran/vector_add_acc.F90           |   0
 examples/opencl/convolution.cl                |   0
 examples/opencl/matmul.cl                     |   0
 examples/opencl/reduction.cl                  |   0
 examples/opencl/stencil.cl                    |   0
 kernel_tuner/__init__.py                      |   0
 kernel_tuner/c.py                             |   0
 kernel_tuner/core.py                          |   8 +-
 kernel_tuner/cuda.py                          |   0
 kernel_tuner/cupy.py                          |   0
 kernel_tuner/hyper.py                         |   0
 kernel_tuner/integration.py                   |   0
 kernel_tuner/interface.py                     |   6 +-
 kernel_tuner/kernelbuilder.py                 |   0
 kernel_tuner/nvml.py                          |   0
 kernel_tuner/observers.py                     |   0
 kernel_tuner/opencl.py                        |   0
 kernel_tuner/python.py                        | 243 +++++++-
 kernel_tuner/runners/__init__.py              |   0
 kernel_tuner/runners/sequential.py            |  17 +-
 kernel_tuner/runners/simulation.py            |   2 +-
 kernel_tuner/strategies/__init__.py           |   0
 kernel_tuner/strategies/basinhopping.py       |   0
 kernel_tuner/strategies/bayes_opt.py          |   0
 kernel_tuner/strategies/bayes_opt_GPyTorch.py |   0
 .../strategies/bayes_opt_GPyTorch_lean.py     | 537 +++++++++++++-----
 .../strategies/bayes_opt_alt_BOTorch.py       |   0
 .../strategies/bayes_opt_alt_BayesOpt.py      |   0
 .../strategies/bayes_opt_alt_HyperOpt.py      |   0
 .../strategies/bayes_opt_alt_ScikitOpt.py     |   0
 kernel_tuner/strategies/bayes_opt_old.py      |   0
 kernel_tuner/strategies/brute_force.py        |   3 +-
 kernel_tuner/strategies/diff_evo.py           |   0
 kernel_tuner/strategies/dual_annealing.py     |   0
 kernel_tuner/strategies/firefly_algorithm.py  |   0
 kernel_tuner/strategies/genetic_algorithm.py  |   0
 kernel_tuner/strategies/greedy_ils.py         |   0
 kernel_tuner/strategies/greedy_mls.py         |   0
 kernel_tuner/strategies/hillclimbers.py       |   0
 kernel_tuner/strategies/minimize.py           |   4 +-
 kernel_tuner/strategies/mls.py                |   0
 kernel_tuner/strategies/ordered_greedy_mls.py |   0
 kernel_tuner/strategies/pso.py                |   0
 kernel_tuner/strategies/random_sample.py      |   0
 .../strategies/simulated_annealing.py         |   0
 kernel_tuner/util.py                          |  50 +-
 kernel_tuner/wrappers.py                      |   0
 roadmap.md                                    |   0
 setup.cfg                                     |   0
 setup.py                                      |   0
 test/__init__.py                              |   0
 test/context.py                               |   0
 test/strategies/test_bayesian_optimization.py |   0
 test/strategies/test_genetic_algorithm.py     |   0
 test/strategies/test_minimize.py              |   0
 test/strategies/test_strategies.py            |   0
 test/test_c_functions.py                      |   0
 test/test_cache_file.json                     |   0
 test/test_core.py                             |   0
 test/test_cuda_functions.py                   |   0
 test/test_cuda_mocked.py                      |   0
 test/test_cupy_functions.py                   |   0
 test/test_hyper.py                            |   0
 test/test_integration.py                      |   0
 test/test_interface.py                        |   0
 test/test_kernelbuilder.py                    |   0
 test/test_minimize.py                         |   0
 test/test_observers.py                        |   0
 test/test_opencl_functions.py                 |   0
 test/test_runners.py                          |   0
 test/test_util_functions.py                   |   0
 tutorial/README.md                            |   0
 tutorial/convolution.ipynb                    |   0
 tutorial/diffusion.ipynb                      |   0
 tutorial/diffusion_opencl.ipynb               |   0
 tutorial/diffusion_use_optparam.ipynb         |   0
 tutorial/grid3d.ipynb                         |   0
 tutorial/matmul/matmul.cu                     |   0
 tutorial/matmul/matmul.png                    | Bin
 tutorial/matmul/matmul_naive.cu               |   0
 tutorial/matmul/matmul_naive.png              | Bin
 tutorial/matmul/matmul_shared.cu              |   0
 tutorial/matmul/matmul_shared.png             | Bin
 tutorial/matrix_multiplication.ipynb          |   0
 128 files changed, 650 insertions(+), 220 deletions(-)
 mode change 100644 => 100755 .gitattributes
 mode change 100644 => 100755 .github/workflows/docs.yml
 mode change 100644 => 100755 .github/workflows/python-app.yml
 mode change 100644 => 100755 .gitignore
 mode change 100644 => 100755 .zenodo.json
 mode change 100644 => 100755 CHANGELOG.md
 mode change 100644 => 100755 CITATION.cff
 mode change 100644 => 100755 CONTRIBUTING.rst
 mode change 100644 => 100755 INSTALL.rst
 mode change 100644 => 100755 LICENSE
 mode change 100644 => 100755 MANIFEST.in
 mode change 100644 => 100755 README.rst
 mode change 100644 => 100755 doc/Makefile
 mode change 100644 => 100755 doc/deploy.sh
 mode change 100644 => 100755 doc/gemm-amd-summary.png
 mode change 100644 => 100755 doc/gh_pages-deploy_key.enc
 mode change 100644 => 100755 doc/source/conf.py
 mode change 100644 => 100755 doc/source/contributing.rst
 mode change 100644 => 100755 doc/source/correctness.rst
 mode change 100644 => 100755 doc/source/design.rst
 mode change 100644 => 100755 doc/source/examples.rst
 mode change 100644 => 100755 doc/source/hostcode.rst
 mode change 100644 => 100755 doc/source/index.rst
 mode change 100644 => 100755 doc/source/install.rst
 mode change 100644 => 100755 doc/source/templates.rst
 mode change 100644 => 100755 doc/source/user-api.rst
 mode change 100644 => 100755 doc/source/vocabulary.rst
 mode change 100644 => 100755 examples/README.rst
 mode change 100644 => 100755 examples/c/matrix_multiply.cpp
 mode change 100644 => 100755 examples/c/matrix_multiply.py
 mode change 100644 => 100755 examples/cuda/convolution.cu
 mode change 100644 => 100755 examples/cuda/convolution_streams.cu
 mode change 100644 => 100755 examples/cuda/expdist.cu
 mode change 100644 => 100755 examples/cuda/matmul.cu
 mode change 100644 => 100755 examples/cuda/pnpoly.cu
 mode change 100644 => 100755 examples/cuda/pnpoly_host.cu
 mode change 100644 => 100755 examples/cuda/reduction.cu
 mode change 100644 => 100755 examples/cuda/spmv.cu
 mode change 100644 => 100755 examples/cuda/stencil.cu
 mode change 100644 => 100755 examples/cuda/texture.py
 mode change 100644 => 100755 examples/cuda/vector_add_jinja.cu
 mode change 100644 => 100755 examples/cuda/vector_add_jinja2.py
 mode change 100644 => 100755 examples/cuda/zeromeanfilter.cu
 mode change 100644 => 100755 examples/fortran/vector_add.F90
 mode change 100644 => 100755 examples/fortran/vector_add_acc.F90
 mode change 100644 => 100755 examples/opencl/convolution.cl
 mode change 100644 => 100755 examples/opencl/matmul.cl
 mode change 100644 => 100755 examples/opencl/reduction.cl
 mode change 100644 => 100755 examples/opencl/stencil.cl
 mode change 100644 => 100755 kernel_tuner/__init__.py
 mode change 100644 => 100755 kernel_tuner/c.py
 mode change 100644 => 100755 kernel_tuner/core.py
 mode change 100644 => 100755 kernel_tuner/cuda.py
 mode change 100644 => 100755 kernel_tuner/cupy.py
 mode change 100644 => 100755 kernel_tuner/hyper.py
 mode change 100644 => 100755 kernel_tuner/integration.py
 mode change 100644 => 100755 kernel_tuner/interface.py
 mode change 100644 => 100755 kernel_tuner/kernelbuilder.py
 mode change 100644 => 100755 kernel_tuner/nvml.py
 mode change 100644 => 100755 kernel_tuner/observers.py
 mode change 100644 => 100755 kernel_tuner/opencl.py
 mode change 100644 => 100755 kernel_tuner/python.py
 mode change 100644 => 100755 kernel_tuner/runners/__init__.py
 mode change 100644 => 100755 kernel_tuner/runners/sequential.py
 mode change 100644 => 100755 kernel_tuner/runners/simulation.py
 mode change 100644 => 100755 kernel_tuner/strategies/__init__.py
 mode change 100644 => 100755 kernel_tuner/strategies/basinhopping.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
 mode change 100644 => 100755 kernel_tuner/strategies/bayes_opt_old.py
 mode change 100644 => 100755 kernel_tuner/strategies/brute_force.py
 mode change 100644 => 100755 kernel_tuner/strategies/diff_evo.py
 mode change 100644 => 100755 kernel_tuner/strategies/dual_annealing.py
 mode change 100644 => 100755 kernel_tuner/strategies/firefly_algorithm.py
 mode change 100644 => 100755 kernel_tuner/strategies/genetic_algorithm.py
 mode change 100644 => 100755 kernel_tuner/strategies/greedy_ils.py
 mode change 100644 => 100755 kernel_tuner/strategies/greedy_mls.py
 mode change 100644 => 100755 kernel_tuner/strategies/hillclimbers.py
 mode change 100644 => 100755 kernel_tuner/strategies/minimize.py
 mode change 100644 => 100755 kernel_tuner/strategies/mls.py
 mode change 100644 => 100755 kernel_tuner/strategies/ordered_greedy_mls.py
 mode change 100644 => 100755 kernel_tuner/strategies/pso.py
 mode change 100644 => 100755 kernel_tuner/strategies/random_sample.py
 mode change 100644 => 100755 kernel_tuner/strategies/simulated_annealing.py
 mode change 100644 => 100755 kernel_tuner/util.py
 mode change 100644 => 100755 kernel_tuner/wrappers.py
 mode change 100644 => 100755 roadmap.md
 mode change 100644 => 100755 setup.cfg
 mode change 100644 => 100755 setup.py
 mode change 100644 => 100755 test/__init__.py
 mode change 100644 => 100755 test/context.py
 mode change 100644 => 100755 test/strategies/test_bayesian_optimization.py
 mode change 100644 => 100755 test/strategies/test_genetic_algorithm.py
 mode change 100644 => 100755 test/strategies/test_minimize.py
 mode change 100644 => 100755 test/strategies/test_strategies.py
 mode change 100644 => 100755 test/test_c_functions.py
 mode change 100644 => 100755 test/test_cache_file.json
 mode change 100644 => 100755 test/test_core.py
 mode change 100644 => 100755 test/test_cuda_functions.py
 mode change 100644 => 100755 test/test_cuda_mocked.py
 mode change 100644 => 100755 test/test_cupy_functions.py
 mode change 100644 => 100755 test/test_hyper.py
 mode change 100644 => 100755 test/test_integration.py
 mode change 100644 => 100755 test/test_interface.py
 mode change 100644 => 100755 test/test_kernelbuilder.py
 mode change 100644 => 100755 test/test_minimize.py
 mode change 100644 => 100755 test/test_observers.py
 mode change 100644 => 100755 test/test_opencl_functions.py
 mode change 100644 => 100755 test/test_runners.py
 mode change 100644 => 100755 test/test_util_functions.py
 mode change 100644 => 100755 tutorial/README.md
 mode change 100644 => 100755 tutorial/convolution.ipynb
 mode change 100644 => 100755 tutorial/diffusion.ipynb
 mode change 100644 => 100755 tutorial/diffusion_opencl.ipynb
 mode change 100644 => 100755 tutorial/diffusion_use_optparam.ipynb
 mode change 100644 => 100755 tutorial/grid3d.ipynb
 mode change 100644 => 100755 tutorial/matmul/matmul.cu
 mode change 100644 => 100755 tutorial/matmul/matmul.png
 mode change 100644 => 100755 tutorial/matmul/matmul_naive.cu
 mode change 100644 => 100755 tutorial/matmul/matmul_naive.png
 mode change 100644 => 100755 tutorial/matmul/matmul_shared.cu
 mode change 100644 => 100755 tutorial/matmul/matmul_shared.png
 mode change 100644 => 100755 tutorial/matrix_multiplication.ipynb

diff --git a/.gitattributes b/.gitattributes
old mode 100644
new mode 100755
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
old mode 100644
new mode 100755
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
old mode 100644
new mode 100755
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/.zenodo.json b/.zenodo.json
old mode 100644
new mode 100755
diff --git a/CHANGELOG.md b/CHANGELOG.md
old mode 100644
new mode 100755
diff --git a/CITATION.cff b/CITATION.cff
old mode 100644
new mode 100755
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
old mode 100644
new mode 100755
diff --git a/INSTALL.rst b/INSTALL.rst
old mode 100644
new mode 100755
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/MANIFEST.in b/MANIFEST.in
old mode 100644
new mode 100755
diff --git a/README.rst b/README.rst
old mode 100644
new mode 100755
diff --git a/doc/Makefile b/doc/Makefile
old mode 100644
new mode 100755
diff --git a/doc/deploy.sh b/doc/deploy.sh
old mode 100644
new mode 100755
diff --git a/doc/gemm-amd-summary.png b/doc/gemm-amd-summary.png
old mode 100644
new mode 100755
diff --git a/doc/gh_pages-deploy_key.enc b/doc/gh_pages-deploy_key.enc
old mode 100644
new mode 100755
diff --git a/doc/source/conf.py b/doc/source/conf.py
old mode 100644
new mode 100755
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
old mode 100644
new mode 100755
diff --git a/doc/source/correctness.rst b/doc/source/correctness.rst
old mode 100644
new mode 100755
diff --git a/doc/source/design.rst b/doc/source/design.rst
old mode 100644
new mode 100755
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
old mode 100644
new mode 100755
diff --git a/doc/source/hostcode.rst b/doc/source/hostcode.rst
old mode 100644
new mode 100755
diff --git a/doc/source/index.rst b/doc/source/index.rst
old mode 100644
new mode 100755
diff --git a/doc/source/install.rst b/doc/source/install.rst
old mode 100644
new mode 100755
diff --git a/doc/source/templates.rst b/doc/source/templates.rst
old mode 100644
new mode 100755
diff --git a/doc/source/user-api.rst b/doc/source/user-api.rst
old mode 100644
new mode 100755
diff --git a/doc/source/vocabulary.rst b/doc/source/vocabulary.rst
old mode 100644
new mode 100755
diff --git a/examples/README.rst b/examples/README.rst
old mode 100644
new mode 100755
diff --git a/examples/c/matrix_multiply.cpp b/examples/c/matrix_multiply.cpp
old mode 100644
new mode 100755
diff --git a/examples/c/matrix_multiply.py b/examples/c/matrix_multiply.py
old mode 100644
new mode 100755
diff --git a/examples/cuda/convolution.cu b/examples/cuda/convolution.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/convolution_streams.cu b/examples/cuda/convolution_streams.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/expdist.cu b/examples/cuda/expdist.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/matmul.cu b/examples/cuda/matmul.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/pnpoly.cu b/examples/cuda/pnpoly.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/pnpoly_host.cu b/examples/cuda/pnpoly_host.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/reduction.cu b/examples/cuda/reduction.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/spmv.cu b/examples/cuda/spmv.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/stencil.cu b/examples/cuda/stencil.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/texture.py b/examples/cuda/texture.py
old mode 100644
new mode 100755
diff --git a/examples/cuda/vector_add_jinja.cu b/examples/cuda/vector_add_jinja.cu
old mode 100644
new mode 100755
diff --git a/examples/cuda/vector_add_jinja2.py b/examples/cuda/vector_add_jinja2.py
old mode 100644
new mode 100755
diff --git a/examples/cuda/zeromeanfilter.cu b/examples/cuda/zeromeanfilter.cu
old mode 100644
new mode 100755
diff --git a/examples/fortran/vector_add.F90 b/examples/fortran/vector_add.F90
old mode 100644
new mode 100755
diff --git a/examples/fortran/vector_add_acc.F90 b/examples/fortran/vector_add_acc.F90
old mode 100644
new mode 100755
diff --git a/examples/opencl/convolution.cl b/examples/opencl/convolution.cl
old mode 100644
new mode 100755
diff --git a/examples/opencl/matmul.cl b/examples/opencl/matmul.cl
old mode 100644
new mode 100755
diff --git a/examples/opencl/reduction.cl b/examples/opencl/reduction.cl
old mode 100644
new mode 100755
diff --git a/examples/opencl/stencil.cl b/examples/opencl/stencil.cl
old mode 100644
new mode 100755
diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/c.py b/kernel_tuner/c.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
old mode 100644
new mode 100755
index fac470b8b..1faf5deb0
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -194,7 +194,8 @@ def check_argument_lists(self, kernel_name, arguments):
 class DeviceInterface(object):
     """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner"""
 
-    def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None):
+    def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None,
+                 parallel_mode=False):
         """ Instantiate the DeviceInterface, based on language in kernel source
 
         :param kernel_source The kernel sources
@@ -228,6 +229,9 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
 
         logging.debug('DeviceInterface instantiated, lang=%s', lang)
 
+        if parallel_mode and lang != "Python":
+            raise NotImplementedError("Parallel mode has not been implemented for languages other than Python")
+
         if lang == "CUDA":
             dev = CudaFunctions(device, compiler_options=compiler_options, iterations=iterations, observers=observers)
         elif lang.upper() == "CUPY":
@@ -237,7 +241,7 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
         elif lang == "C":
             dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations)
         elif lang == "Python":
-            dev = PythonFunctions(iterations=iterations)
+            dev = PythonFunctions(iterations=iterations, observers=observers, parallel_mode=parallel_mode, show_progressbar=True)
         else:
             raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
 
diff --git a/kernel_tuner/cuda.py b/kernel_tuner/cuda.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/cupy.py b/kernel_tuner/cupy.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
old mode 100644
new mode 100755
index e5cddcdb8..14f5dfd71
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -403,7 +403,7 @@ def _get_docstring(opts):
 def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params, grid_div_x=None, grid_div_y=None, grid_div_z=None, restrictions=None,
                 answer=None, atol=1e-6, verify=None, verbose=False, lang=None, device=0, platform=0, smem_args=None, cmem_args=None, texmem_args=None,
                 compiler=None, compiler_options=None, log=None, iterations=7, block_size_names=None, quiet=False, strategy=None, strategy_options=None,
-                cache=None, metrics=None, simulation_mode=False, observers=None):
+                cache=None, metrics=None, simulation_mode=False, parallel_mode=False, observers=None):
 
     if log:
         logging.basicConfig(filename=kernel_name + datetime.now().strftime('%Y%m%d-%H:%M:%S') + '.log', level=log)
@@ -469,7 +469,7 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
 
     # select the runner for this job based on input
     selected_runner = SimulationRunner if simulation_mode is True else SequentialRunner
-    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers) as runner:
+    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers, parallel_mode) as runner:
 
         #the user-specified function may or may not have an optional atol argument;
         #we normalize it so that it always accepts atol.
@@ -498,7 +498,7 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
             else:
                 print("no results to report")
 
-        if cache:
+        if cache and not simulation_mode:
             util.close_cache(cache)
 
     return results, env
diff --git a/kernel_tuner/kernelbuilder.py b/kernel_tuner/kernelbuilder.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/nvml.py b/kernel_tuner/nvml.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/observers.py b/kernel_tuner/observers.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/opencl.py b/kernel_tuner/opencl.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
old mode 100644
new mode 100755
index 9655b068d..69c6ac33c
--- a/kernel_tuner/python.py
+++ b/kernel_tuner/python.py
@@ -1,25 +1,39 @@
-""" This module contains the functionality for running and compiling C functions """
+""" This module contains the functionality for running Python functions """
 
 from collections import namedtuple
 import platform
 import logging
+import warnings
 import importlib.util
+from math import ceil
+from time import perf_counter
+from typing import Tuple
 
-import numpy
-import numpy.ctypeslib
+# import cProfile
 
-from kernel_tuner.util import get_temp_filename, delete_temp_file, write_file
+import progressbar
+import numpy as np
+
+# for parallel subprocess runs
+from multiprocess import Manager, cpu_count, get_context    # using Pathos as Python's multiprocessing is unable to pickle
+from itertools import repeat
+import subprocess
+import sys
+from os import getpid
+
+from kernel_tuner.util import get_temp_filename, delete_temp_file
 
 # This represents an individual kernel argument.
 # It contains a numpy object (ndarray or number) and a ctypes object with a copy
 # of the argument data. For an ndarray, the ctypes object is a wrapper for the ndarray's data.
 Argument = namedtuple("Argument", ["numpy", "ctypes"])
+invalid_value = 1e20
 
 
 class PythonFunctions(object):
     """Class that groups the code for running and compiling C functions"""
 
-    def __init__(self, iterations=7):
+    def __init__(self, iterations=7, observers=None, parallel_mode=False, show_progressbar=False):
         """instantiate PythonFunctions object used for interacting with Python code
 
         :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
@@ -27,12 +41,24 @@ def __init__(self, iterations=7):
         """
         self.iterations = iterations
         self.max_threads = 1024
+        self.show_progressbar = show_progressbar
 
         #environment info
         env = dict()
         env["iterations"] = self.iterations
         self.env = env
         self.name = platform.processor()
+        self.observers = observers or []
+        self.parallel_mode = parallel_mode
+
+        self.benchmark_times = []
+
+        if self.parallel_mode:
+            warnings.warn(
+                "Be sure to check that simulation mode is true for the kernel, because parallel mode requires a completed cache file to avoid race conditions.")
+
+        if len(self.observers) > 0 and self.parallel_mode:
+            raise NotImplementedError("Observers are currently not implemented for parallel execution.")
 
     def __enter__(self):
         return self
@@ -95,27 +121,107 @@ def benchmark(self, func, args, threads, grid):
         :returns: All execution times.
         :rtype: dict()
         """
+
+        # For reference: the following times were obtained with 35 repeats on random_sample strategy.
+        # As seen, there is a lot of overhead with subproceses; directly executing the function scales much better.
+        # time taken by sequential: 20.7 sec
+        # time taken by parallel in sequential form (subprocess overhead): 46.3 sec
+        # time taken by parallel subprocesses: 7.5 sec on 9, 9.9 sec on 8, 13.6 sec on 4, 27.8 sec on 2, 45.9 sec on 1
+        # time taken by parallel directly: 2.99 sec on 9, 4.0 sec on 8, 5.23 sec on 4, 11.3 sec on 2, 19.3 sec on 1
+
         result = dict()
         result["times"] = []
-        for _ in range(self.iterations):
-            value = self.run_kernel(func, args, threads, grid)
-
-            #I would like to replace the following with actually capturing
-            #stderr and detecting the error directly in Python, it proved
-            #however that capturing stderr for non-Python functions from Python
-            #is a rather difficult thing to do
-            #
-            #The current, less than ideal, scheme uses the convention that a
-            #negative time indicates a 'too many resources requested for launch'
-            #which Kernel Tuner can silently ignore
-            if value < 0.0:
-                raise Exception("too many resources requested for launch")
-
-            result["times"].append(value)
-        result["time"] = numpy.mean(result["times"])
+        min_valid_iterations = ceil(self.iterations * 0.8)
+        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
+            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
+
+        # new implementation
+        start_time = perf_counter()
+        if self.parallel_mode and cpu_count() > 1:
+            num_procs = max(min(cpu_count() - 2, self.iterations), 1)
+            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+            manager = Manager()
+            invalid_flag = manager.Value('i', int(False))
+            MNE_values = manager.list()
+            runtimes = manager.list()
+            warnings_dicts = manager.list()
+            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+                args = func, args, self.params, invalid_flag
+                MNE_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                MNE_values, runtimes, warnings_dicts = list(MNE_values), list(runtimes), list(warnings_dicts)
+            result["strategy_time"] = np.mean(runtimes)
+            warning_dict = warnings_dicts[0]
+            for key in warning_dict.keys():
+                warning_dict[key] = np.mean(list(warnings_dict[key] for warnings_dict in warnings_dicts))
+            result["warnings"] = warning_dict
+        else:
+            raise NotImplementedError("Sequential mode has not been implemented yet")
+
+        benchmark_time = perf_counter() - start_time
+        self.benchmark_times.append(benchmark_time)
+        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
+
+        grandmean, times = get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations)
+        result["times"] = times
+        result["time"] = grandmean
+        print(f"Grandmean over kernels: {grandmean}, mean MNE per iteration: {np.mean(times)}, std MNE per iteration: {np.std(times)}")
+        return result
+
+        start_time = perf_counter()
+        if self.parallel_mode:
+            num_procs = max(cpu_count() - 1, 1)
+            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+            manager = Manager()
+            MRE_values = manager.list()
+            runtimes = manager.list()
+            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+                args = func, args, self.params
+                MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                MRE_values, runtimes = list(MRE_values), list(runtimes)
+                print(MRE_values)
+            result["times"] = values
+            result["strategy_time"] = np.mean(runtimes)
+            np_results = np.array(values)
+        else:
+            # sequential implementation
+            np_results = np.array([])
+            for iter in iterator:
+                for obs in self.observers:
+                    obs.before_start()
+                value = self.run_kernel(func, args)
+                for obs in self.observers:
+                    obs.after_finish()
+
+                if value < 0.0:
+                    raise ValueError("Invalid benchmark result")
+
+                result["times"].append(value)
+                np_results = np.append(np_results, value)
+                if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
+                    break
+
+            # fill up the remaining iters with invalid in case of a break
+            result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
+
+            # finish by instrumenting the results with the observers
+            for obs in self.observers:
+                result.update(obs.get_results())
+
+        benchmark_time = perf_counter() - start_time
+        self.benchmark_times.append(benchmark_time)
+        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
+
+        # calculate the mean of the means of the Mean Relative Error over the valid results
+        valid_results = np_results[np_results < invalid_value]
+        mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
+
+        # write the 'time' to the results and return
+        if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
+            mean_mean_MRE = invalid_value
+        result["time"] = mean_mean_MRE
         return result
 
-    def run_kernel(self, func, args, threads, grid):
+    def run_kernel(self, func, args):
         """runs the kernel once, returns whatever the kernel returns
 
         :param func: A C function compiled for this specific configuration
@@ -140,8 +246,99 @@ def run_kernel(self, func, args, threads, grid):
         logging.debug("run_kernel")
         logging.debug("arguments=" + str([str(arg) for arg in args]))
 
-        time = func(**self.params)
+        time = func(*args, **self.params)
 
         return time
 
     units = {}
+
+
+def run_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
+    """ Function to run a kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
+    PID = getpid()
+    print(f"Iter {iter+1}, PID {PID}", flush=True)
+    func, funcargs, params, invalid_flag = args
+    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
+    logging.debug("arguments=" + str([str(arg) for arg in funcargs]))
+
+    # run the kernel
+    starttime = perf_counter()
+    # cProfile.runctx('func(invalid_flag, *funcargs, **params)', globals(), locals(), 'profile-%s.out' % str(iter + 1))
+    # values, warning_dict = None, None
+    values, warning_dict = func(invalid_flag, *funcargs, **params)
+    runtime = perf_counter() - starttime
+    return values, runtime, warning_dict
+
+
+def run_kernel_as_subprocess(iter, args):
+    """ Function to run a kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
+    func, args, params = args
+    PID = getpid()
+    # print(f"Iter {iter}, PID {PID}", flush=True)
+    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
+    logging.debug("arguments=" + str([str(arg) for arg in args]))
+
+    def make_kwargstrings(**kwargs) -> list:
+        return list(f"{key}={value}" for key, value in kwargs.items())
+
+    # Subprocess
+    args += make_kwargstrings(**params)
+    proc = subprocess.run([sys.executable or 'python', str(func.__name__ + '.py')] + args, shell=False, capture_output=True)
+    stderr = f"subprocess {iter} with PID {PID} errors: {proc.stderr.decode('utf-8')}" if len(proc.stderr.decode('utf-8')) > 0 else ""
+    stdout = f"subprocess {iter} with PID {PID} output: {proc.stdout.decode('utf-8')}" if len(proc.stdout.decode('utf-8')) > 0 else ""
+
+    if stderr != "":
+        logging.debug(stderr)
+        print(stderr)
+    if stdout != "":
+        logging.debug(stdout)
+        # print(stdout)
+
+    time = float(stdout.split("result_value=")[1])
+    return time
+
+
+def get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations=1):
+    """ Get the grandmean (mean of median MNE per kernel) and mean MNE per iteration """
+    MNE_values = np.array(MNE_values)
+    median_MNEs = np.array([])
+    valid_MNE_times = list()
+    # get the mean MNE per kernel
+    for i in range(len(MNE_values[0])):
+        MNE_kernel_values = MNE_values[:, i]
+        valid_MNE_mask = (MNE_kernel_values < invalid_value) & (MNE_kernel_values >= 0)
+        valid_MNE_kernel_values = MNE_kernel_values[valid_MNE_mask]
+        if len(valid_MNE_kernel_values) >= min_valid_iterations:
+            # # filter outliers by keeping only values that are within two times the Median Absolute Deviation
+            # AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            # MAD = np.median(AD)
+            # selected_MNE_kernel_values = valid_MNE_kernel_values[AD < MAD * 3]
+            # print(f"Removed {len(valid_MNE_kernel_values) - len(selected_MNE_kernel_values)}")
+            # median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
+            # median_MNEs = np.append(median_MNEs, np.mean(valid_MNE_kernel_values))
+
+            # filter outliers by keeping only values that are within three times the Median Absolute Deviation
+            AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            MAD = np.median(AD)
+            MAD_score = AD / MAD if MAD else 0.0
+            selected_MNE_kernel_values = valid_MNE_kernel_values[MAD_score < 3]
+            median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
+        else:
+            median_MNEs = np.append(median_MNEs, invalid_value)
+
+    # get the mean MNE per iteration
+    for i in range(len(MNE_values)):
+        MNE_iteration_values = MNE_values[i]
+        valid_MNE_mask = (MNE_iteration_values < invalid_value) & (MNE_iteration_values >= 0)
+        valid_MNE_iteration_values = MNE_iteration_values[valid_MNE_mask]
+        if len(valid_MNE_iteration_values) > 0:
+            valid_MNE_times.append(np.mean(valid_MNE_iteration_values))
+        else:
+            valid_MNE_times.append(invalid_value)
+
+    # get the grandmean by taking the mean over the median MNE per iteration, invalid if one of the kernels is invalid
+    print(median_MNEs)
+    grandmean_MNE = np.mean(median_MNEs)
+    if np.isnan(grandmean_MNE) or len(median_MNEs[median_MNEs >= invalid_value]) > 0:
+        grandmean_MNE = invalid_value
+    return grandmean_MNE, valid_MNE_times
diff --git a/kernel_tuner/runners/__init__.py b/kernel_tuner/runners/__init__.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
old mode 100644
new mode 100755
index 05b94121b..20fbfaa7b
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -11,7 +11,7 @@
 class SequentialRunner(object):
     """ SequentialRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
         """ Instantiate the SequentialRunner
 
         :param kernel_source: The kernel source
@@ -30,14 +30,12 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         """
 
         #detect language and create high-level device interface
-        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options).__enter__()
+        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, parallel_mode=parallel_mode, **device_options).__enter__()
 
         self.units = self.dev.units
         self.quiet = device_options.quiet
         self.kernel_source = kernel_source
-
-        self.warmed_up = False
-
+        self.warmed_up = True if kernel_source.lang == 'Python' else False
         self.simulation_mode = False
 
         #move data to the GPU
@@ -80,10 +78,9 @@ def run(self, parameter_space, kernel_options, tuning_options):
 
             #check if element is in the cache
             x_int = ",".join([str(i) for i in element])
-            if tuning_options.cache:
-                if x_int in tuning_options.cache:
-                    results.append(tuning_options.cache[x_int])
-                    continue
+            if tuning_options.cache and x_int in tuning_options.cache:
+                results.append(tuning_options.cache[x_int])
+                continue
 
             result = self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, kernel_options, tuning_options)
             if result is None:
@@ -106,7 +103,7 @@ def run(self, parameter_space, kernel_options, tuning_options):
             if tuning_options.metrics:
                 params = process_metrics(params, tuning_options.metrics)
 
-            print_config_output(tuning_options.tune_params, params, self.quiet, tuning_options.metrics, self.units)
+            # print_config_output(tuning_options.tune_params, params, self.quiet, tuning_options.metrics, self.units) # TODO uncomment
 
             store_cache(x_int, params, tuning_options)
             results.append(params)
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
old mode 100644
new mode 100755
index aba6dc88c..9e58634a5
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -173,7 +173,7 @@ def __exit__(self, *exc):
 class SimulationRunner(object):
     """ SimulationRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
         """ Instantiate the SimulationRunner
 
         :param kernel_source: The kernel source
diff --git a/kernel_tuner/strategies/__init__.py b/kernel_tuner/strategies/__init__.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
old mode 100644
new mode 100755
index 594f4aa23..e4809be7f
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -1,22 +1,42 @@
 """ Lean implementation of Bayesian Optimization with GPyTorch """
+# python
 from copy import deepcopy
-from typing import Any, Tuple
+from typing import Tuple
 from random import randint, shuffle, choice
 from math import ceil
+import warnings
+import ast    # for casting strings to dict
+
+# external
 import numpy as np
-from numpy.lib.arraysetops import unique
 from numpy.random import default_rng
 import torch
 import gpytorch
+import arviz as az
 
-from kernel_tuner.util import get_valid_configs, config_valid
+# internal
+from kernel_tuner.util import get_valid_configs
 from kernel_tuner.strategies import minimize
 
+# set supported hyperparameter values
+supported_precisions = ['float', 'double']
 supported_initial_sample_methods = ['lhs', 'index', 'random']
 supported_methods = ['ei', 'poi', 'random']
 supported_cov_kernels = ['matern', 'matern_scalekernel']
 supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
-supported_optimizers = ['LBFGS', 'Adam']
+supported_optimizers = ['LBFGS', 'Adam', 'AdamW', 'Adagrad', 'ASGD']
+
+
+# set complex hyperparameter defaults
+def default_optimizer_learningrates(key):
+    defaults = {
+        'LBFGS': 1,
+        'Adam': 0.001,
+        'AdamW': 0.001,
+        'ASGD': 0.01,
+        'Adagrad': 0.01
+    }
+    return defaults[key]
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
@@ -50,18 +70,15 @@ def tune(runner, kernel_options, device_options, tuning_options):
     if cuda_available:
         print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
 
-
     # retrieve options with defaults
     options = tuning_options.strategy_options
     optimization_direction = options.get("optimization_direction", 'min')
-    num_initial_samples = options.get("popsize", 20)
-    max_fevals = options.get("max_fevals", 100)
+    num_initial_samples = int(options.get("popsize", 20))
+    max_fevals = int(options.get("max_fevals", 220))
     max_threads = runner.dev.max_threads
-    if max_fevals < num_initial_samples:
-        raise ValueError(f"Maximum number of function evaluations ({max_fevals}) can not be lower than the number of initial samples ({num_initial_samples}) ")
 
-    # enabling scaling will unscale and snap inputs on evaluation, more efficient to keep unscale values in a lookup table
-    tuning_options["snap"] = True
+    # enabling scaling will unscale and snap inputs on evaluation, more efficient to scale all at once and keep unscaled values
+    tuning_options["snap"] = False
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
@@ -69,12 +86,14 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     # limit max_fevals to max size of the parameter space
     max_fevals = min(len(parameter_space), max_fevals)
+    if max_fevals < num_initial_samples:
+        raise ValueError(
+            f"Maximum number of function evaluations ({max_fevals}) can not be lower than or equal to the number of initial samples ({num_initial_samples}), you might as well brute-force."
+        )
 
     # execute Bayesian Optimization
     BO = BayesianOptimization(parameter_space, kernel_options, tuning_options, runner, num_initial_samples, optimization_direction, device)
-    # BO.visualize()
     all_results = BO.optimize(max_fevals)
-    # BO.visualize()
 
     return all_results, runner.dev.get_environment()
 
@@ -97,7 +116,8 @@ def forward(self, x):
 
 class BayesianOptimization:
 
-    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str, device: torch.device) -> None:
+    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str,
+                 device: torch.device) -> None:
         self.animate = False    # TODO remove
 
         # set defaults
@@ -113,37 +133,40 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.runner = runner
         self.max_threads = runner.dev.max_threads
 
+        # get precision options
+        self.dtype = torch.float if self.get_hyperparam("precision", "float", supported_precisions) == "float" else torch.double
+        self.min_std = self.get_hyperparam("minimum_std", 1e-6, type=float)
+
         # get tuning options
         self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
-        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1)
-        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 50)
-        self.training_iter = self.get_hyperparam("trainingiter", 3)
+        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)
+        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)
+        self.training_iter = self.get_hyperparam("trainingiter", 1, type=int)
         self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
-        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5)
+        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 0.5, type=float)
         self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
-        self.optimizer_name = self.get_hyperparam("optimizer", "Adam", supported_optimizers)
-        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", 0.1)
+        self.optimizer_name = self.get_hyperparam("optimizer", "LBFGS", supported_optimizers)
+        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", self.optimizer_name, type=float, cast=default_optimizer_learningrates)
         acquisition_function_name = self.get_hyperparam("method", "ei", supported_methods)
-        af_params = self.get_hyperparam("methodparams", {})
+        af_params = self.get_hyperparam("methodparams", {}, type=dict, cast=ast.literal_eval)
 
         # set acquisition function options
         self.set_acquisition_function(acquisition_function_name)
         if 'explorationfactor' not in af_params:
-            af_params['explorationfactor'] = 'CV'
+            af_params['explorationfactor'] = 0.1
         self.af_params = af_params
 
         # set Tensors
-        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
         self.device = device
         self.out_device = torch.device("cpu")
-        self.dtype = torch.double
         self.size = len(parameter_space)
-        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
         self.index_counter = torch.arange(self.size)
+        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
+        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
         self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
         self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
-        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan             # x (param configs) and y (results) must be the same type
-        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device) * 1e-3
+        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan    # x (param configs) and y (results) must be the same type
+        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device)    # only a valid assumption if outputs are normalized
 
         # transform non-numerical parameters to numerical, keep true_param_configs for evaluation function
         self.param_configs, self.tune_params = self.transform_nonnumerical_params(parameter_space)
@@ -180,6 +203,11 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
             'lengthscale': np.nan,
             'noise': np.nan,
         }
+        self.hyperparams_means = {
+            'loss': np.array([]),
+            'lengthscale': np.array([]),
+            'noise': np.array([]),
+        }
         self.initialize_model()
 
     @property
@@ -232,15 +260,19 @@ def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tenso
         masked_counter = self.index_counter[self.unvisited_configs]
         return masked_counter.index_select(0, target_indices)
 
-    def initialize_model(self):
+    def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
         """ Initialize the surrogate model """
-        self.initial_sample()
+        if not self.runner.simulation_mode:
+            self.import_cached_evaluations()
+        self.initial_sample_std = self.min_std
+        if take_initial_sample:
+            self.initial_sample()
 
         # create the model
         if self.likelihood_name == 'Gaussian':
             self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
         elif self.likelihood_name == 'FixedNoise':
-            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=1.0e-4), learn_additional_noise=False)
+            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=False)
         self.likelihood = self.likelihood.to(self.device)
         self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
 
@@ -249,18 +281,46 @@ def initialize_model(self):
         self.likelihood.train()
         model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
 
-        # LBFGS is probably better as Adam is only first-order
+        # set the optimizer
+        # LBFGS is probably better as Adam is first-order
         if self.optimizer_name == 'LBFGS':
             self.optimizer = torch.optim.LBFGS(model_parameters, lr=self.optimizer_learningrate)
         elif self.optimizer_name == 'Adam':
             self.optimizer = torch.optim.Adam(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'AdamW':
+            self.optimizer = torch.optim.AdamW(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'ASGD':
+            self.optimizer = torch.optim.ASGD(model_parameters, lr=self.optimizer_learningrate)
+        elif self.optimizer_name == 'Adagrad':
+            self.optimizer = torch.optim.Adagrad(model_parameters, lr=self.optimizer_learningrate)
 
         self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model).to(self.device)
-        self.train_hyperparams(self.initial_training_iter)
+        if train_hyperparams:
+            self.train_hyperparams(self.initial_training_iter)
+        else:
+            self.train_hyperparams(0)
+
+    def import_cached_evaluations(self):
+        """ Import the previously evaluated configurations into this run """
+        # make strings of all the parameter configurations in the search space
+        param_config_strings = list()
+        for param_config in self.true_param_configs:
+            param_config_strings.append(",".join([str(v) for v in param_config]))
+
+        # load the results from the cache into the run
+        cache = self.tuning_options.cache
+        if len(cache.keys()) > 0:
+            print("Previous cachefile found while not in simulation mode, importing previous evaluations.")
+        for param_config_string, result in cache.items():
+            # get the index of the string in the search space
+            param_config_index = param_config_strings.index(param_config_string)
+            time = self.evaluate_config(param_config_index)
+            assert time == result['time']
+        print(f"Imported {len(self.all_results)} previously evaluated configurations.")
 
     def initial_sample(self):
         """ Take an initial sample of the parameter space """
-        list_param_config_indices = list()
+        list_param_config_indices = list(self.index_counter[~self.unvisited_configs])
 
         # generate a random offset from a normal distribution to add to the sample indices
         rng = default_rng()
@@ -270,9 +330,11 @@ def initial_sample(self):
         random_offsets = np.round(rng.standard_normal(self.num_initial_samples) * random_offset_size)
 
         # first apply the initial sampling method
-        if self.initial_sample_method == 'lhs':
+        if self.initial_sample_method == 'lhs' and self.num_initial_samples - self.fevals > 1:
             indices = self.get_lhs_samples(random_offsets)
             for param_config_index in indices.tolist():
+                if param_config_index in list_param_config_indices:
+                    continue
                 list_param_config_indices.append(param_config_index)
                 self.evaluate_config(param_config_index)
         elif self.initial_sample_method == 'random':
@@ -286,21 +348,27 @@ def initial_sample(self):
         # then take index-spaced samples until all samples are valid
         while self.fevals < self.num_initial_samples:
             least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size-1)
+            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size - 1)
+            if param_config_index in list_param_config_indices:
+                warnings.warn(
+                    f"An already evaluated configuration ({param_config_index}) was selected for index-spaced sampling. " +
+                    "If this happens regularly, reduce the initial sample random offset factor.", AlreadyEvaluatedConflict)
+                param_config_index = least_evaluated_region_index
             list_param_config_indices.append(param_config_index)
             self.evaluate_config(param_config_index)
 
         # set the current optimum, initial sample mean and initial sample std
         self.current_optimum = self.opt(self.train_y).item()
         self.initial_sample_mean = self.train_y.mean().item()
-        self.initial_sample_std = None
+        # self.initial_sample_std = self.train_y.std().item()
+        self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
 
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
 
     def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
         """ Get a centered Latin Hypercube Sample with a random offset """
-        n_samples = self.num_initial_samples
+        n_samples = self.num_initial_samples - self.fevals
 
         # first get the seperate parameter values to make possibly fictional distributed parameter configurations
         temp_param_configs = [[] for _ in range(n_samples)]
@@ -321,13 +389,14 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
 
         # create a tensor of the possibly fictional parameter configurations
         param_configs = torch.tensor(list(tuple(param_config) for param_config in temp_param_configs), dtype=self.dtype).to(self.device)
-        param_configs = param_configs.unique(dim=0) # remove duplicates
+        param_configs = param_configs.unique(dim=0)    # remove duplicates
         n_samples_unique = len(param_configs)
 
         # get the indices of the parameter configurations
         num_params = len(self.param_configs[0])
-        minimum_required_num_matching_params = round(num_params * 0.75)  # set the number of parameter matches allowed to be dropped before the search is stopped
-        param_configs_indices = torch.full((n_samples_unique,), -1, dtype=torch.int)
+        minimum_required_num_matching_params = round(num_params *
+                                                     0.75)    # set the number of parameter matches allowed to be dropped before the search is stopped
+        param_configs_indices = torch.full((n_samples_unique, ), -1, dtype=torch.int)
         for selected_index, selected_param_config in enumerate(param_configs):
             # for each parameter configuration, count the number of matching parameters
             required_num_matching_params = num_params
@@ -350,15 +419,16 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
                 continue
 
             # set the selected index
-            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size-1)
+            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size - 1)
 
         # filter -1 indices and duplicates that occurred because of the random offset
         param_configs_indices = param_configs_indices[param_configs_indices >= 0]
         param_configs_indices = param_configs_indices.unique().type(torch.int)
         if len(param_configs_indices) < n_samples / 2:
-            print(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1.",
-                  f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params}).",
-                  "Perhaps try something other than LHS.")
+            warnings.warn(
+                str(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1." +
+                    f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params})." +
+                    "Perhaps try something other than LHS."))
         return param_configs_indices
 
     def get_middle_index_of_least_evaluated_region(self) -> int:
@@ -379,11 +449,6 @@ def get_middle_index_of_least_evaluated_region(self) -> int:
         # print(f"Max distance {biggest_distance}, index: {middle_index}, between: {biggest_distance_index-biggest_distance}-{biggest_distance_index}")
         return middle_index
 
-    def find_nearest(self, value, array: torch.Tensor):
-        """ Find the value nearest to the given value in the array """
-        index = (torch.abs(array - value)).argmin()
-        return array[index]
-
     def train_hyperparams(self, training_iter: int):
         """ Optimize the surrogate model hyperparameters iteratively """
         self.model.train()
@@ -395,37 +460,55 @@ def closure():
             try:
                 loss = -self.mll(output, self.train_y)    # calculate loss and backprop gradients
                 loss.backward()
+                # large sudden increase in loss signals numerical instability
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore", category=RuntimeWarning)
+                    no_nan_losses = self.hyperparams_means['loss'][~np.isnan(self.hyperparams_means['loss'])]
+                    if len(no_nan_losses) > 1 and loss.item() > np.mean(no_nan_losses) * 2:
+                        warnings.warn("Avoiding loss surge, aborting training", AvoidedLossSurgeWarning)
+                        return np.nan
                 return loss
             except gpytorch.utils.errors.NotPSDError:
-                print(f"WARNING - matrix not positive definite during training")
+                warnings.warn("Matrix not positive definite during training", NotPSDTrainingWarning)
+                return np.nan
 
         loss = None
         for _ in range(training_iter):
-            _loss = self.optimizer.step(closure)
-            if _loss is not None:
+            try:
+                _loss = self.optimizer.step(closure)
+                if _loss is np.nan:
+                    break
                 loss = _loss
+            except gpytorch.utils.errors.NanError:
+                warnings.warn("PSD_safe_Cholesky failed due to too many NaN", NaNTrainingWarning)
+                break
 
         # set the hyperparams to the new values
         try:
-            lengthscale = self.model.covar_module.lengthscale.item()
+            lengthscale = float(self.model.covar_module.lengthscale.item())
         except AttributeError:
-            lengthscale = self.model.covar_module.base_kernel.lengthscale.item()
+            lengthscale = float(self.model.covar_module.base_kernel.lengthscale.item())
+        loss = float(loss.item()) if loss is not None else np.nan
+        noise = float(self.model.likelihood.noise.mean().detach())
         self.hyperparams = {
-            'loss': float(loss.item()) if loss is not None else np.nan,
-            'lengthscale': float(lengthscale),
-            'noise': float(self.model.likelihood.noise.mean().detach()),
+            'loss': loss,
+            'lengthscale': lengthscale,
+            'noise': noise,
         }
+        self.hyperparams_means['loss'] = np.append(self.hyperparams_means['loss'], loss)
+        self.hyperparams_means['lengthscale'] = np.append(self.hyperparams_means['lengthscale'], lengthscale)
+        self.hyperparams_means['noise'] = np.append(self.hyperparams_means['noise'], noise)
 
         # get into evaluation (predictive posterior) mode
         self.model.eval()
         self.likelihood.eval()
 
-    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
+    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
         """ Optimize the objective """
         predictions_tuple = None
         short_param_config_index = None
         last_invalid = False
-        report_multiple_minima = round(self.size / 10)    # if more than 10% of the space is minima, print a warning
+        report_multiple_minima = ceil(round(self.size / 10))    # if more than 10% of the space is minima, print a warning
         use_contextual_variance = self.af_params['explorationfactor'] == 'CV'
         while self.fevals < max_fevals:
             if last_invalid:
@@ -435,37 +518,57 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
                 predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
             else:
                 predictions_tuple = self.predict_list()
-                if self.initial_sample_std is None:
-                    self.initial_sample_std = predictions_tuple[1].mean().item()
-            hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
-            acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
-            short_param_config_index = self.argopt(acquisition_values)
-            param_config_index = self.true_param_config_index(short_param_config_index)
-
-            # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
-            min_acquisition_function_value = acquisition_values[short_param_config_index]
-            indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
-            if len(indices_where_min) > 1:
-                # first get the true index for the minima
-                true_indices_where_min = self.true_param_config_indices(indices_where_min)
-                # then get the index of the least evaluated region
+                if self.initial_sample_std <= self.min_std:
+                    self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
+            # if there are NaN or all of the predicted std are the same, take from the least evaluated region
+            mean_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[0])).item())
+            std_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[1])).item())
+            if mean_has_NaN or std_has_NaN or torch.all(predictions_tuple[1] == predictions_tuple[1][0]):
                 least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-                # now find the minima closest to the least evaluated region
-                param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
-                short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
-                if len(indices_where_min) > report_multiple_minima:
-                    print(
-                        f"WARNING - after {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region"
-                    )
+                param_config_index = least_evaluated_region_index
+                short_param_config_index = -1
+                if mean_has_NaN:
+                    warning_reason = f"there were NaN in the predicted mean"
+                elif std_has_NaN:
+                    warning_reason = f"there were NaN in the predicted std"
+                else:
+                    warning_reason = "all STDs were the same"
+                warnings.warn(
+                    f"After {self.fevals}/{max_fevals} fevals, {warning_reason}, picking one from the least evaluated region and resetting the surrogate model",
+                    ResetModelWarning)
+                self.initialize_model(take_initial_sample=False, train_hyperparams=False)
+            else:
+                # otherwise, optimize the acquisition function to find the next candidate
+                hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
+                acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
+                short_param_config_index = self.argopt(acquisition_values)
+                param_config_index = self.true_param_config_index(short_param_config_index)
+
+                # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
+                min_acquisition_function_value = acquisition_values[short_param_config_index]
+                indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
+                if len(indices_where_min) > 1:
+                    # first get the true index for the minima
+                    true_indices_where_min = self.true_param_config_indices(indices_where_min)
+                    # then get the index of the least evaluated region
+                    least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
+                    # now find the minima closest to the least evaluated region
+                    param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
+                    short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
+                    if len(indices_where_min) > report_multiple_minima:
+                        warnings.warn(
+                            f"After {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region",
+                            MultipleMinimaWarning)
 
             # evaluate and register the result
             result = self.evaluate_config(param_config_index)
             if result == self.invalid_value and short_param_config_index > -1:
-                # can't use last_invalid if there were multiple minima in the acquisition function values, because short_param_config_index will not be set
+                # can't use last_invalid if short_param_config_index is not set
                 last_invalid = True
             else:
                 last_invalid = False
                 self.model.set_train_data(self.train_x, self.train_y, strict=False)
+                # do not train if there are multiple minima, because it introduces numerical instability or insolvability
                 if self.training_iter > 0:
                     self.train_hyperparams(training_iter=self.training_iter)
                 # set the current optimum
@@ -477,7 +580,7 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:
         return self.all_results
 
     def objective_function(self, param_config: tuple) -> float:
-        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results)
+        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results, check_restrictions=False)
 
     def evaluate_config(self, param_config_index: int) -> float:
         """ Evaluates a parameter configuration, returns the time """
@@ -501,14 +604,15 @@ def register_result(self, result: float, param_config_index: int):
             self.valid_configs[param_config_index] = True
             self.results[param_config_index] = result
             assert last_result['time'] == result
-            self.results_std[param_config_index] = np.std(last_result['times'])
+            self.results_std[param_config_index] = max(np.std(last_result['times']), self.min_std)
 
-        # add the current model parameters to the results dict
+        # add the current model parameters to the last entry of the results dict
         if len(self.all_results) < 1:
             return
         for key, value in self.hyperparams.items():
-            last_result[key] = value
+            last_result["hyperparam_" + key] = value
         self.all_results[-1] = last_result
+        # TODO check if it is possible to write the results with hyperparameters to the cache if not in simulation mode, maybe with observer?
 
     def update_unique_results(self):
         """ Updates the unique results dictionary """
@@ -519,25 +623,22 @@ def update_unique_results(self):
     def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
         """ Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations """
         with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
-            observed_pred = self.likelihood(self.model(self.test_x))
-            mu = observed_pred.mean
-            std = observed_pred.variance.clamp(min=1e-9)    # TODO .sqrt() or not? looks like without is better
-            return mu, std
-
-    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Remove an index from a tuple of predictions """
-        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
-        list_random = list(range(len(self.unvisited_param_configs)))
-        shuffle(list_random)
-        return list_random
+            try:
+                observed_pred = self.likelihood(self.model(self.test_x))
+                mu = observed_pred.mean
+                std = observed_pred.variance.clamp(min=self.min_std)    # TODO .sqrt() or not? looks like without is better
+                return mu, std
+            except gpytorch.utils.errors.NanError:
+                warnings.warn("NaN error during predictions", NaNPredictionWarning)
+                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
+            except gpytorch.utils.errors.NotPSDError:
+                warnings.warn("NotPSD error during predictions", NotPSDPredictionWarning)
+                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
 
     def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
         """ compute probability of improvement by assuming normality on the difference in improvement """
         diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
-        diff_improvement = (diff_improvement - diff_improvement.mean()) / diff_improvement.std()    # force to N(0,1) with z-score
+        diff_improvement = (diff_improvement - diff_improvement.mean()) / max(diff_improvement.std(), self.min_std)    # force to N(0,1) with z-score
         if self.optimization_direction == 'max':
             diff_improvement = -diff_improvement
         return diff_improvement
@@ -556,11 +657,23 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
             # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
             x = 1 - min(max(1 - improvement_diff, 0.2), 0.0)
-            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2057, y=0.0.
+            # x = 1 - min(max(improvement_diff, 1) * 0.2, 0.0)
+            # the smaller the difference between the initial sample error and current sample error, the greater the exploration
+            # x = 1 - min(max(self.initial_sample_std - std.mean().item(), 1.0), 0.8)
+            # print(self.initial_sample_std, std.mean().item())
+            # print(x)
+            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2, y=0.003; at x=0.2057, y=0.0.
+            # print(cv)
             return cv
         else:
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
 
+    def af_random(self, predictions=None, hyperparam=None) -> list:
+        """ Acquisition function returning a randomly shuffled list for comparison """
+        list_random = list(range(len(self.unvisited_param_configs)))
+        shuffle(list_random)
+        return list_random
+
     def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
         """ Acquisition function Probability of Improvement (PoI) tensor-based """
 
@@ -574,9 +687,9 @@ def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor,
         normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
         cdf = normal.cdf(diff_improvement)
 
-        # sanity check
-        if torch.all(cdf == cdf[0]):
-            raise ValueError("You need to scale the diff_improvement-values!")
+        # # sanity check
+        # if torch.all(cdf == cdf[0]):
+        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
         return cdf
 
     def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
@@ -593,9 +706,9 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
         cdf = normal.cdf(diff_improvement)
         pdf = torch.exp(normal.log_prob(diff_improvement))
 
-        # sanity check
-        if torch.all(cdf == cdf[0]) or torch.all(pdf == pdf[0]):
-            raise ValueError("You need to scale the diff_improvement-values!")
+        # # sanity check
+        # if torch.all(cdf == cdf[0]) and torch.all(pdf == pdf[0]):
+        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
 
         # compute expected improvement in bulk
         exp_improvement = (pdf + diff_improvement + y_std * cdf)
@@ -607,25 +720,6 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
     """ Helper functions """
     """                  """
 
-    def get_hyperparam(self, name: str, default, supported_values=list()):
-        """ Retrieve the value of a hyperparameter based on the name """
-        value = self.tuning_options.strategy_options.get(name, default)
-        if len(supported_values) > 0 and value not in supported_values:
-            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-        return value
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function based on the name """
-        if acquisition_function not in supported_methods:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-        if acquisition_function == 'poi':
-            self.acquisition_function = self.af_probability_of_improvement_tensor
-        elif acquisition_function == 'ei':
-            self.acquisition_function = self.af_expected_improvement_tensor
-        elif acquisition_function == 'random':
-            self.acquisition_function = self.af_random
-
     def apply_scaling_to_inputs(self):
         """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
         param_configs_scaled = torch.zeros_like(self.param_configs)
@@ -645,16 +739,57 @@ def apply_scaling_to_inputs(self):
         for param_index in range(len(self.param_configs[0])):
             v_min = v_min_list[param_index]
             v_diff = v_diff_list[param_index]
-            param_configs_scaled[:,param_index] = torch.sub(self.param_configs[:,param_index], v_min).div(v_diff)
+            param_configs_scaled[:, param_index] = torch.sub(self.param_configs[:, param_index], v_min).div(v_diff)
 
         # finally remove parameters that are constant by applying a mask
         unchanging_params_tensor = ~torch.tensor(unchanging_params_list, dtype=torch.bool)
-        if torch.all(unchanging_params_tensor == False):
-            raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
+        # if torch.all(unchanging_params_tensor == False):
+        # raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
         nonstatic_param_count = torch.count_nonzero(unchanging_params_tensor)
         self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
         for param_config_index, param_config in enumerate(param_configs_scaled):
             self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
+        self.nonstatic_params = unchanging_params_tensor
+
+    def find_nearest(self, value, array: torch.Tensor):
+        """ Find the value nearest to the given value in the array """
+        index = (torch.abs(array - value)).argmin()
+        return array[index]
+
+    def get_hyperparam(self, name: str, default, supported_values=list(), type=None, cast=None):
+        """ Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function """
+        value = self.tuning_options.strategy_options.get(name, default)
+
+        # check with predifined value list
+        if len(supported_values) > 0 and value not in supported_values:
+            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
+        # cast to type if provided
+        if type and not isinstance(value, type):
+            if cast:
+                value = cast(value)
+            else:
+                value = type(value)
+
+        # exceptions with more complex types
+        if value == 'methodparams' and 'explorationfactor' in value and value['explorationfactor'] != 'CV':
+            value = float(value)
+        return value
+
+    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        """ Remove an index from a tuple of predictions """
+        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
+
+    def set_acquisition_function(self, acquisition_function: str):
+        """ Set the acquisition function based on the name """
+        if acquisition_function not in supported_methods:
+            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
+
+        if acquisition_function == 'poi':
+            self.acquisition_function = self.af_probability_of_improvement_tensor
+        elif acquisition_function == 'ei':
+            self.acquisition_function = self.af_expected_improvement_tensor
+        elif acquisition_function == 'random':
+            self.acquisition_function = self.af_random
 
     def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
         """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
@@ -664,7 +799,7 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
         # find out which parameters have nonnumerical or mixed types, and create a range of integers instead
         nonnumericals_exist = False
         nonnumerical_type = torch.zeros(number_of_params, dtype=torch.bool)
-        nonnumerical_values = [ [] for _ in range(number_of_params) ]
+        nonnumerical_values = [[] for _ in range(number_of_params)]
         tune_params = deepcopy(self.tuning_options.tune_params)
         for param_index, (param_key, param_values) in enumerate(self.tuning_options.tune_params.items()):
             if not all(isinstance(v, (int, float, complex)) for v in param_values):
@@ -675,7 +810,7 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
 
         # overwrite the nonnumerical parameters with numerical parameters
         if nonnumericals_exist:
-            self.tuning_options["snap"] = False     # snapping is only possible with numerical values
+            self.tuning_options["snap"] = False    # snapping is only possible with numerical values
             for param_config_index, param_config in enumerate(parameter_space):
                 parameter_space[param_config_index] = list(param_config)
                 for param_index, param_value in enumerate(param_config):
@@ -686,22 +821,73 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
 
         return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
 
+    def to_xarray(self):
+        # print(self.tuning_options['tune_params'])
+        # print(az.convert_to_inference_data(self.tuning_options['tune_params']).posterior)
+        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
+            posterior = self.model(self.param_configs_scaled)
+            predictive_posterior = self.likelihood(posterior)
+            # print(posterior.variance)
+            # print(az.convert_to_inference_data(posterior.to_data_independent_dist()))
+            # print(len(posterior.covariance_matrix))
+            # print(len(posterior.covariance_matrix[0]))
+            # exit(0)
+
+            # data = az.load_arviz_data('centered_eight')
+            # az.plot_posterior(data, show=True)
+
+            param_configs = list(tuple(pc) for pc in self.param_configs.tolist())
+            # posterior_dict = dict(zip(param_configs, posterior.get_base_samples()))
+            posterior_dict = {
+                'mu': posterior.mean,
+                'var': posterior.variance
+            }
+            predictive_posterior_dict = {
+                'mu': predictive_posterior.mean,
+                'var': predictive_posterior.variance
+            }
+            print(posterior_dict)
+            # predictive_posterior_dict = dict(zip(str(self.param_configs_scaled.numpy()), predictive_posterior.get_base_samples()))
+            # log_prob_dict = dict(zip(self.param_configs_scaled, predictive_posterior.log_prob()))
+            tune_param_keys = np.array(list(self.tune_params.keys()))[self.nonstatic_params]
+            tune_param_values = np.array(list(self.tune_params.values()), dtype=object)[self.nonstatic_params]
+            coordinates = dict(zip(tune_param_keys, tune_param_values))
+            dimensions = dict(zip(tune_param_keys, ([k] for k in tune_param_keys)))
+            print(coordinates)
+            print(dimensions)
+            data = az.from_dict(posterior_dict, posterior_predictive=predictive_posterior_dict)
+            print(az.summary(data))
+            print(data.posterior)
+            print(data.posterior_predictive)
+            az.plot_trace(data, show=True)
+            exit(0)
+            print(data.posterior_predictive)
+
+            # print(az.convert_to_inference_data(posterior.get_base_samples()))
+        # TODO create InferenceData
+        # print(predictive_posterior.sample())
+        # print(az.from_dict())
+        # print(az.convert_to_inference_data(predictive_posterior))
+        exit(0)
 
     def visualize(self):
         """ Visualize the surrogate model and observations in a plot """
         from matplotlib import pyplot as plt
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             # Initialize plot
-            f, ax = plt.subplots(1, 1, figsize=(10, 5))
+            f = plt.figure(constrained_layout=True, figsize=(10, 8))
+            subfigures = f.subfigures(2, 1)
+            ax = subfigures[0].subplots(1, 1)
+            axes2 = subfigures[1].subplots(1, 3)
             ax.set_ylabel('Value')
             ax.set_xlabel('Parameter')
 
-            param_configs = self.param_configs.to(self.out_device)
+            param_configs = self.true_param_configs
 
             # get true function
             objective_results = np.array([])
             for param_config in param_configs:
-                result = self.objective_function(tuple(param_config.tolist()))
+                result = self.objective_function(tuple(param_config))
                 if result == self.invalid_value:
                     result = np.nan
                 objective_results = np.append(objective_results, result)
@@ -748,8 +934,85 @@ def visualize(self):
             # ax.set_ylim(min(objective_results), max(filter(lambda x: x != self.invalid_value, objective_results)))
             ax.legend(['Objective Function', 'Initial Sample', 'Observed Data', 'Mean', 'Confidence'])
 
+            # draw the hyperparameter plots
+            # loss
+            axes2[0].plot(self.hyperparams_means['loss'])
+            axes2[0].set_ylabel('Loss')
+            axes2[0].set_xlabel('Number of evaluations')
+            # lengthscale
+            axes2[1].plot(self.hyperparams_means['lengthscale'])
+            axes2[1].set_ylabel('Lengthscale')
+            axes2[1].set_xlabel('Number of evaluations')
+            # noise
+            axes2[2].plot(self.hyperparams_means['noise'])
+            axes2[2].set_ylabel('Noise')
+            axes2[2].set_xlabel('Number of evaluations')
+
             if self.animate:
-                f.canvas.draw()
-                plt.pause(0.1)
+                # f.canvas.draw()
+                plt.savefig('animation_last_graph')
+                # plt.pause(0.1)
+
+            # plt.show()
+
+
+class CustomWarning(Warning):
+
+    def __init__(self, message: str, category: str) -> None:
+        # super().__init__()
+        self.message = message
+        self.category = category
+
+    def __str__(self):
+        return repr(self.message)
+
+    def category(self):
+        return self.category.__name__
+
+
+class AvoidedLossSurgeWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "AvoidedLossSurgeWarning")
+
+
+class NotPSDTrainingWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NotPSDTrainingWarning")
+
+
+class NaNTrainingWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NaNTrainingWarning")
+
+
+class NaNPredictionWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NaNPredictionWarning")
+
+
+class NotPSDPredictionWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "NotPSDPredictionWarning")
+
+
+class ResetModelWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "ResetModelWarning")
+
+
+class MultipleMinimaWarning(CustomWarning):
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "MultipleMinimaWarning")
+
+
+class AlreadyEvaluatedConflict(CustomWarning):
 
-            plt.show()
+    def __init__(self, message: str) -> None:
+        super().__init__(message, "AlreadyEvaluatedConflict")
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py b/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py b/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py b/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py
old mode 100644
new mode 100755
index d3364f7d3..d72713908
--- a/kernel_tuner/strategies/brute_force.py
+++ b/kernel_tuner/strategies/brute_force.py
@@ -39,8 +39,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     # check for search space restrictions
     if restrictions is not None:
-        parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose),
-                                 parameter_space)
+        parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose), parameter_space)
 
     results, env = runner.run(parameter_space, kernel_options, tuning_options)
 
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/hillclimbers.py b/kernel_tuner/strategies/hillclimbers.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
old mode 100644
new mode 100755
index 14a33559e..eb9b1b81b
--- a/kernel_tuner/strategies/minimize.py
+++ b/kernel_tuner/strategies/minimize.py
@@ -56,7 +56,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
     return results, runner.dev.get_environment()
 
 
-def _cost_func(x, kernel_options, tuning_options, runner, results):
+def _cost_func(x, kernel_options, tuning_options, runner, results, check_restrictions=True):
     """ Cost function used by minimize """
 
     error_time = 1e20
@@ -80,7 +80,7 @@ def _cost_func(x, kernel_options, tuning_options, runner, results):
         return tuning_options.cache[x_int]["time"]
 
     # check if this is a legal (non-restricted) parameter instance
-    if tuning_options.restrictions:
+    if check_restrictions and tuning_options.restrictions:
         legal = util.check_restrictions(tuning_options.restrictions, params, tuning_options.tune_params.keys(), tuning_options.verbose)
         if not legal:
             error_result = OrderedDict(zip(tuning_options.tune_params.keys(), params))
diff --git a/kernel_tuner/strategies/mls.py b/kernel_tuner/strategies/mls.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/ordered_greedy_mls.py b/kernel_tuner/strategies/ordered_greedy_mls.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
old mode 100644
new mode 100755
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
old mode 100644
new mode 100755
index 838a54b97..332ac6750
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -606,7 +606,7 @@ def process_cache(cache, kernel_options, tuning_options, runner):
             if filestr[-1] == ",":
                 filestr = filestr[:-1]
             filestr = filestr + "}\n}"
-        else:
+        elif not tuning_options.simulation_mode:    # don't do this in simulation mode because the cache must have no race conditions in case of parallel execution
             # if it was properly closed, open it for appending new entries
             with open(cache, "w") as cachefile:
                 cachefile.write(filestr[:-3] + ",")
@@ -672,52 +672,22 @@ def dump_cache(obj: str, tuning_options):
             cachefile.write(obj)
 
 
-def parse_restrictions(restrictions: str):
+def parse_restrictions(restrictions: list):
     """" parses restrictions from a list of strings into a callable function """
-    operators = [ '+', '-', '*', '/', '%', '==', '!=', '(', ')', '[', ']' ]
 
+    regex_match_variable = r"([a-zA-Z_$][a-zA-Z_$0-9]*)"
     suffix = ' and '
     parsed_restrictions = ""
     for restriction in restrictions:
-        new = ""
-
-        # first make sure everything that should be space-seperated is
-        for index in range(len(restriction)):
-            if restriction[index] in operators and index > 0 and restriction[index-1] != ' ':
-                new += ' '
-            new += restriction[index]
-            if restriction[index] in operators and index < len(restriction) - 1 and restriction[index+1] != ' ':
-                new += ' '
-
-        restriction = new
-
-        # then parse each part
-        new = ""
-        words = restriction.split(" ")
-        for word in words:
-
-            # filter spaces and empty words
-            if word == ' ' or word == '':
-                continue
-
-            # filter the operators
-            if word in operators:
-                new += word + ' '
-                continue
-
-            # filter numbers
-            if np.char.isnumeric(word):
-                new += word + ' '
-                continue
-
-            # make variables a dictionary 'p' lookup
-            word = f"params['{word}']"
-            new += word
-            new += ' '
+        parsed_restrictions += re.sub(regex_match_variable, r'params["\1"]', restriction) + suffix
 
-        parsed_restrictions += (new + suffix)
+    # tidy up the code by removing the last suffix and unecessary spaces
+    parsed_restrictions = parsed_restrictions[:-len(suffix)]
+    parsed_restrictions = parsed_restrictions.strip()
+    parsed_restrictions = " ".join(parsed_restrictions.split())
 
-    parsed_restrictions = "def restrictions(params): \n return " + parsed_restrictions[:-len(suffix)]
+    # compile into a function
+    parsed_restrictions = f"def restrictions(params): return {parsed_restrictions} \n"
     code_object = compile(parsed_restrictions, '<string>', 'exec')
     func = FunctionType(code_object.co_consts[0], globals())
     return func
diff --git a/kernel_tuner/wrappers.py b/kernel_tuner/wrappers.py
old mode 100644
new mode 100755
diff --git a/roadmap.md b/roadmap.md
old mode 100644
new mode 100755
diff --git a/setup.cfg b/setup.cfg
old mode 100644
new mode 100755
diff --git a/setup.py b/setup.py
old mode 100644
new mode 100755
diff --git a/test/__init__.py b/test/__init__.py
old mode 100644
new mode 100755
diff --git a/test/context.py b/test/context.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_minimize.py b/test/strategies/test_minimize.py
old mode 100644
new mode 100755
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
old mode 100644
new mode 100755
diff --git a/test/test_c_functions.py b/test/test_c_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
old mode 100644
new mode 100755
diff --git a/test/test_core.py b/test/test_core.py
old mode 100644
new mode 100755
diff --git a/test/test_cuda_functions.py b/test/test_cuda_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_cuda_mocked.py b/test/test_cuda_mocked.py
old mode 100644
new mode 100755
diff --git a/test/test_cupy_functions.py b/test/test_cupy_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_hyper.py b/test/test_hyper.py
old mode 100644
new mode 100755
diff --git a/test/test_integration.py b/test/test_integration.py
old mode 100644
new mode 100755
diff --git a/test/test_interface.py b/test/test_interface.py
old mode 100644
new mode 100755
diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py
old mode 100644
new mode 100755
diff --git a/test/test_minimize.py b/test/test_minimize.py
old mode 100644
new mode 100755
diff --git a/test/test_observers.py b/test/test_observers.py
old mode 100644
new mode 100755
diff --git a/test/test_opencl_functions.py b/test/test_opencl_functions.py
old mode 100644
new mode 100755
diff --git a/test/test_runners.py b/test/test_runners.py
old mode 100644
new mode 100755
diff --git a/test/test_util_functions.py b/test/test_util_functions.py
old mode 100644
new mode 100755
diff --git a/tutorial/README.md b/tutorial/README.md
old mode 100644
new mode 100755
diff --git a/tutorial/convolution.ipynb b/tutorial/convolution.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/diffusion.ipynb b/tutorial/diffusion.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/diffusion_opencl.ipynb b/tutorial/diffusion_opencl.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/diffusion_use_optparam.ipynb b/tutorial/diffusion_use_optparam.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/grid3d.ipynb b/tutorial/grid3d.ipynb
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul.cu b/tutorial/matmul/matmul.cu
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul.png b/tutorial/matmul/matmul.png
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_naive.cu b/tutorial/matmul/matmul_naive.cu
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_naive.png b/tutorial/matmul/matmul_naive.png
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_shared.cu b/tutorial/matmul/matmul_shared.cu
old mode 100644
new mode 100755
diff --git a/tutorial/matmul/matmul_shared.png b/tutorial/matmul/matmul_shared.png
old mode 100644
new mode 100755
diff --git a/tutorial/matrix_multiplication.ipynb b/tutorial/matrix_multiplication.ipynb
old mode 100644
new mode 100755

From cf1d4e4a14bb94ba0f7a9b252a4d1811842a637e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 16 Feb 2022 09:59:45 +0100
Subject: [PATCH 005/168] Reverted file permissions

---
 .gitattributes                                     |   0
 .github/workflows/docs.yml                         |   0
 .github/workflows/python-app.yml                   |   0
 .gitignore                                         |   0
 .zenodo.json                                       |   0
 CHANGELOG.md                                       |   0
 CITATION.cff                                       |   0
 CONTRIBUTING.rst                                   |   0
 INSTALL.rst                                        |   0
 LICENSE                                            |   0
 MANIFEST.in                                        |   0
 README.rst                                         |   0
 doc/Makefile                                       |   0
 doc/deploy.sh                                      |   0
 doc/gemm-amd-summary.png                           | Bin
 doc/gh_pages-deploy_key.enc                        | Bin
 doc/source/conf.py                                 |   0
 doc/source/contributing.rst                        |   0
 doc/source/correctness.rst                         |   0
 doc/source/design.png                              | Bin
 doc/source/design.rst                              |   0
 doc/source/examples.rst                            |   0
 doc/source/hostcode.rst                            |   0
 doc/source/index.rst                               |   0
 doc/source/install.rst                             |   0
 doc/source/templates.rst                           |   0
 doc/source/user-api.rst                            |   0
 doc/source/vocabulary.rst                          |   0
 examples/README.rst                                |   0
 examples/c/matrix_multiply.cpp                     |   0
 examples/c/matrix_multiply.py                      |   0
 examples/c/vector_add.py                           |   0
 examples/cuda-c++/vector_add.py                    |   0
 examples/cuda-c++/vector_add_blocksize.py          |   0
 examples/cuda-c++/vector_add_cupy.py               |   0
 examples/cuda/convolution.cu                       |   0
 examples/cuda/convolution.py                       |   0
 examples/cuda/convolution_correct.py               |   0
 examples/cuda/convolution_streams.cu               |   0
 examples/cuda/convolution_streams.py               |   0
 examples/cuda/expdist.cu                           |   0
 examples/cuda/expdist.py                           |   0
 examples/cuda/matmul.cu                            |   0
 examples/cuda/matmul.py                            |   0
 examples/cuda/pnpoly.cu                            |   0
 examples/cuda/pnpoly.py                            |   0
 examples/cuda/pnpoly_host.cu                       |   0
 examples/cuda/python_kernel.py                     |   0
 examples/cuda/reduction.cu                         |   0
 examples/cuda/reduction.py                         |   0
 examples/cuda/sepconv.py                           |   0
 examples/cuda/spmv.cu                              |   0
 examples/cuda/spmv.py                              |   0
 examples/cuda/stencil.cu                           |   0
 examples/cuda/stencil.py                           |   0
 examples/cuda/test_vector_add.py                   |   0
 examples/cuda/test_vector_add_parameterized.py     |   0
 examples/cuda/texture.py                           |   0
 examples/cuda/vector_add.py                        |   0
 examples/cuda/vector_add_codegen.py                |   0
 examples/cuda/vector_add_cupy.py                   |   0
 examples/cuda/vector_add_jinja.cu                  |   0
 examples/cuda/vector_add_jinja.py                  |   0
 examples/cuda/vector_add_jinja2.py                 |   0
 examples/cuda/vector_add_metric.py                 |   0
 examples/cuda/vector_add_observers.py              |   0
 examples/cuda/zeromeanfilter.cu                    |   0
 examples/cuda/zeromeanfilter.py                    |   0
 examples/fortran/test_vector_add.py                |   0
 examples/fortran/vector_add.F90                    |   0
 examples/fortran/vector_add.py                     |   0
 examples/fortran/vector_add_acc.F90                |   0
 examples/fortran/vector_add_acc.py                 |   0
 examples/opencl/convolution.cl                     |   0
 examples/opencl/convolution.py                     |   0
 examples/opencl/convolution_correct.py             |   0
 examples/opencl/matmul.cl                          |   0
 examples/opencl/matmul.py                          |   0
 examples/opencl/reduction.cl                       |   0
 examples/opencl/reduction.py                       |   0
 examples/opencl/sepconv.py                         |   0
 examples/opencl/stencil.cl                         |   0
 examples/opencl/stencil.py                         |   0
 examples/opencl/vector_add.py                      |   0
 examples/opencl/vector_add_codegen.py              |   0
 examples/opencl/vector_add_observers.py            |   0
 kernel_tuner/__init__.py                           |   0
 kernel_tuner/c.py                                  |   0
 kernel_tuner/core.py                               |   0
 kernel_tuner/cuda.py                               |   0
 kernel_tuner/cupy.py                               |   0
 kernel_tuner/hyper.py                              |   0
 kernel_tuner/integration.py                        |   0
 kernel_tuner/interface.py                          |   0
 kernel_tuner/kernelbuilder.py                      |   0
 kernel_tuner/nvml.py                               |   0
 kernel_tuner/observers.py                          |   0
 kernel_tuner/opencl.py                             |   0
 kernel_tuner/python.py                             |   0
 kernel_tuner/runners/__init__.py                   |   0
 kernel_tuner/runners/sequential.py                 |   0
 kernel_tuner/runners/simulation.py                 |   0
 kernel_tuner/strategies/__init__.py                |   0
 kernel_tuner/strategies/basinhopping.py            |   0
 kernel_tuner/strategies/bayes_opt.py               |   0
 kernel_tuner/strategies/bayes_opt_GPyTorch.py      |   0
 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py |   0
 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py   |   0
 kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py  |   0
 kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py  |   0
 kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py |   0
 kernel_tuner/strategies/bayes_opt_old.py           |   0
 kernel_tuner/strategies/brute_force.py             |   0
 kernel_tuner/strategies/diff_evo.py                |   0
 kernel_tuner/strategies/dual_annealing.py          |   0
 kernel_tuner/strategies/firefly_algorithm.py       |   0
 kernel_tuner/strategies/genetic_algorithm.py       |   0
 kernel_tuner/strategies/greedy_ils.py              |   0
 kernel_tuner/strategies/greedy_mls.py              |   0
 kernel_tuner/strategies/hillclimbers.py            |   0
 kernel_tuner/strategies/minimize.py                |   0
 kernel_tuner/strategies/mls.py                     |   0
 kernel_tuner/strategies/ordered_greedy_mls.py      |   0
 kernel_tuner/strategies/pso.py                     |   0
 kernel_tuner/strategies/random_sample.py           |   0
 kernel_tuner/strategies/simulated_annealing.py     |   0
 kernel_tuner/util.py                               |   0
 kernel_tuner/wrappers.py                           |   0
 roadmap.md                                         |   0
 setup.cfg                                          |   0
 setup.py                                           |   0
 test/__init__.py                                   |   0
 test/context.py                                    |   0
 test/strategies/test_bayesian_optimization.py      |   0
 test/strategies/test_genetic_algorithm.py          |   0
 test/strategies/test_minimize.py                   |   0
 test/strategies/test_strategies.py                 |   0
 test/test_c_functions.py                           |   0
 test/test_cache_file.json                          |   0
 test/test_core.py                                  |   0
 test/test_cuda_functions.py                        |   0
 test/test_cuda_mocked.py                           |   0
 test/test_cupy_functions.py                        |   0
 test/test_hyper.py                                 |   0
 test/test_integration.py                           |   0
 test/test_interface.py                             |   0
 test/test_kernelbuilder.py                         |   0
 test/test_minimize.py                              |   0
 test/test_observers.py                             |   0
 test/test_opencl_functions.py                      |   0
 test/test_runners.py                               |   0
 test/test_util_functions.py                        |   0
 tutorial/README.md                                 |   0
 tutorial/convolution.ipynb                         |   0
 tutorial/diffusion.ipynb                           |   0
 tutorial/diffusion_opencl.ipynb                    |   0
 tutorial/diffusion_use_optparam.ipynb              |   0
 tutorial/grid3d.ipynb                              |   0
 tutorial/matmul/matmul.cu                          |   0
 tutorial/matmul/matmul.png                         | Bin
 tutorial/matmul/matmul.py                          |   0
 tutorial/matmul/matmul_naive.cu                    |   0
 tutorial/matmul/matmul_naive.png                   | Bin
 tutorial/matmul/matmul_naive.py                    |   0
 tutorial/matmul/matmul_shared.cu                   |   0
 tutorial/matmul/matmul_shared.png                  | Bin
 tutorial/matmul/matmul_shared.py                   |   0
 tutorial/matrix_multiplication.ipynb               |   0
 168 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 .gitattributes
 mode change 100755 => 100644 .github/workflows/docs.yml
 mode change 100755 => 100644 .github/workflows/python-app.yml
 mode change 100755 => 100644 .gitignore
 mode change 100755 => 100644 .zenodo.json
 mode change 100755 => 100644 CHANGELOG.md
 mode change 100755 => 100644 CITATION.cff
 mode change 100755 => 100644 CONTRIBUTING.rst
 mode change 100755 => 100644 INSTALL.rst
 mode change 100755 => 100644 LICENSE
 mode change 100755 => 100644 MANIFEST.in
 mode change 100755 => 100644 README.rst
 mode change 100755 => 100644 doc/Makefile
 mode change 100755 => 100644 doc/deploy.sh
 mode change 100755 => 100644 doc/gemm-amd-summary.png
 mode change 100755 => 100644 doc/gh_pages-deploy_key.enc
 mode change 100755 => 100644 doc/source/conf.py
 mode change 100755 => 100644 doc/source/contributing.rst
 mode change 100755 => 100644 doc/source/correctness.rst
 mode change 100755 => 100644 doc/source/design.png
 mode change 100755 => 100644 doc/source/design.rst
 mode change 100755 => 100644 doc/source/examples.rst
 mode change 100755 => 100644 doc/source/hostcode.rst
 mode change 100755 => 100644 doc/source/index.rst
 mode change 100755 => 100644 doc/source/install.rst
 mode change 100755 => 100644 doc/source/templates.rst
 mode change 100755 => 100644 doc/source/user-api.rst
 mode change 100755 => 100644 doc/source/vocabulary.rst
 mode change 100755 => 100644 examples/README.rst
 mode change 100755 => 100644 examples/c/matrix_multiply.cpp
 mode change 100755 => 100644 examples/c/matrix_multiply.py
 mode change 100755 => 100644 examples/c/vector_add.py
 mode change 100755 => 100644 examples/cuda-c++/vector_add.py
 mode change 100755 => 100644 examples/cuda-c++/vector_add_blocksize.py
 mode change 100755 => 100644 examples/cuda-c++/vector_add_cupy.py
 mode change 100755 => 100644 examples/cuda/convolution.cu
 mode change 100755 => 100644 examples/cuda/convolution.py
 mode change 100755 => 100644 examples/cuda/convolution_correct.py
 mode change 100755 => 100644 examples/cuda/convolution_streams.cu
 mode change 100755 => 100644 examples/cuda/convolution_streams.py
 mode change 100755 => 100644 examples/cuda/expdist.cu
 mode change 100755 => 100644 examples/cuda/expdist.py
 mode change 100755 => 100644 examples/cuda/matmul.cu
 mode change 100755 => 100644 examples/cuda/matmul.py
 mode change 100755 => 100644 examples/cuda/pnpoly.cu
 mode change 100755 => 100644 examples/cuda/pnpoly.py
 mode change 100755 => 100644 examples/cuda/pnpoly_host.cu
 mode change 100755 => 100644 examples/cuda/python_kernel.py
 mode change 100755 => 100644 examples/cuda/reduction.cu
 mode change 100755 => 100644 examples/cuda/reduction.py
 mode change 100755 => 100644 examples/cuda/sepconv.py
 mode change 100755 => 100644 examples/cuda/spmv.cu
 mode change 100755 => 100644 examples/cuda/spmv.py
 mode change 100755 => 100644 examples/cuda/stencil.cu
 mode change 100755 => 100644 examples/cuda/stencil.py
 mode change 100755 => 100644 examples/cuda/test_vector_add.py
 mode change 100755 => 100644 examples/cuda/test_vector_add_parameterized.py
 mode change 100755 => 100644 examples/cuda/texture.py
 mode change 100755 => 100644 examples/cuda/vector_add.py
 mode change 100755 => 100644 examples/cuda/vector_add_codegen.py
 mode change 100755 => 100644 examples/cuda/vector_add_cupy.py
 mode change 100755 => 100644 examples/cuda/vector_add_jinja.cu
 mode change 100755 => 100644 examples/cuda/vector_add_jinja.py
 mode change 100755 => 100644 examples/cuda/vector_add_jinja2.py
 mode change 100755 => 100644 examples/cuda/vector_add_metric.py
 mode change 100755 => 100644 examples/cuda/vector_add_observers.py
 mode change 100755 => 100644 examples/cuda/zeromeanfilter.cu
 mode change 100755 => 100644 examples/cuda/zeromeanfilter.py
 mode change 100755 => 100644 examples/fortran/test_vector_add.py
 mode change 100755 => 100644 examples/fortran/vector_add.F90
 mode change 100755 => 100644 examples/fortran/vector_add.py
 mode change 100755 => 100644 examples/fortran/vector_add_acc.F90
 mode change 100755 => 100644 examples/fortran/vector_add_acc.py
 mode change 100755 => 100644 examples/opencl/convolution.cl
 mode change 100755 => 100644 examples/opencl/convolution.py
 mode change 100755 => 100644 examples/opencl/convolution_correct.py
 mode change 100755 => 100644 examples/opencl/matmul.cl
 mode change 100755 => 100644 examples/opencl/matmul.py
 mode change 100755 => 100644 examples/opencl/reduction.cl
 mode change 100755 => 100644 examples/opencl/reduction.py
 mode change 100755 => 100644 examples/opencl/sepconv.py
 mode change 100755 => 100644 examples/opencl/stencil.cl
 mode change 100755 => 100644 examples/opencl/stencil.py
 mode change 100755 => 100644 examples/opencl/vector_add.py
 mode change 100755 => 100644 examples/opencl/vector_add_codegen.py
 mode change 100755 => 100644 examples/opencl/vector_add_observers.py
 mode change 100755 => 100644 kernel_tuner/__init__.py
 mode change 100755 => 100644 kernel_tuner/c.py
 mode change 100755 => 100644 kernel_tuner/core.py
 mode change 100755 => 100644 kernel_tuner/cuda.py
 mode change 100755 => 100644 kernel_tuner/cupy.py
 mode change 100755 => 100644 kernel_tuner/hyper.py
 mode change 100755 => 100644 kernel_tuner/integration.py
 mode change 100755 => 100644 kernel_tuner/interface.py
 mode change 100755 => 100644 kernel_tuner/kernelbuilder.py
 mode change 100755 => 100644 kernel_tuner/nvml.py
 mode change 100755 => 100644 kernel_tuner/observers.py
 mode change 100755 => 100644 kernel_tuner/opencl.py
 mode change 100755 => 100644 kernel_tuner/python.py
 mode change 100755 => 100644 kernel_tuner/runners/__init__.py
 mode change 100755 => 100644 kernel_tuner/runners/sequential.py
 mode change 100755 => 100644 kernel_tuner/runners/simulation.py
 mode change 100755 => 100644 kernel_tuner/strategies/__init__.py
 mode change 100755 => 100644 kernel_tuner/strategies/basinhopping.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
 mode change 100755 => 100644 kernel_tuner/strategies/bayes_opt_old.py
 mode change 100755 => 100644 kernel_tuner/strategies/brute_force.py
 mode change 100755 => 100644 kernel_tuner/strategies/diff_evo.py
 mode change 100755 => 100644 kernel_tuner/strategies/dual_annealing.py
 mode change 100755 => 100644 kernel_tuner/strategies/firefly_algorithm.py
 mode change 100755 => 100644 kernel_tuner/strategies/genetic_algorithm.py
 mode change 100755 => 100644 kernel_tuner/strategies/greedy_ils.py
 mode change 100755 => 100644 kernel_tuner/strategies/greedy_mls.py
 mode change 100755 => 100644 kernel_tuner/strategies/hillclimbers.py
 mode change 100755 => 100644 kernel_tuner/strategies/minimize.py
 mode change 100755 => 100644 kernel_tuner/strategies/mls.py
 mode change 100755 => 100644 kernel_tuner/strategies/ordered_greedy_mls.py
 mode change 100755 => 100644 kernel_tuner/strategies/pso.py
 mode change 100755 => 100644 kernel_tuner/strategies/random_sample.py
 mode change 100755 => 100644 kernel_tuner/strategies/simulated_annealing.py
 mode change 100755 => 100644 kernel_tuner/util.py
 mode change 100755 => 100644 kernel_tuner/wrappers.py
 mode change 100755 => 100644 roadmap.md
 mode change 100755 => 100644 setup.cfg
 mode change 100755 => 100644 setup.py
 mode change 100755 => 100644 test/__init__.py
 mode change 100755 => 100644 test/context.py
 mode change 100755 => 100644 test/strategies/test_bayesian_optimization.py
 mode change 100755 => 100644 test/strategies/test_genetic_algorithm.py
 mode change 100755 => 100644 test/strategies/test_minimize.py
 mode change 100755 => 100644 test/strategies/test_strategies.py
 mode change 100755 => 100644 test/test_c_functions.py
 mode change 100755 => 100644 test/test_cache_file.json
 mode change 100755 => 100644 test/test_core.py
 mode change 100755 => 100644 test/test_cuda_functions.py
 mode change 100755 => 100644 test/test_cuda_mocked.py
 mode change 100755 => 100644 test/test_cupy_functions.py
 mode change 100755 => 100644 test/test_hyper.py
 mode change 100755 => 100644 test/test_integration.py
 mode change 100755 => 100644 test/test_interface.py
 mode change 100755 => 100644 test/test_kernelbuilder.py
 mode change 100755 => 100644 test/test_minimize.py
 mode change 100755 => 100644 test/test_observers.py
 mode change 100755 => 100644 test/test_opencl_functions.py
 mode change 100755 => 100644 test/test_runners.py
 mode change 100755 => 100644 test/test_util_functions.py
 mode change 100755 => 100644 tutorial/README.md
 mode change 100755 => 100644 tutorial/convolution.ipynb
 mode change 100755 => 100644 tutorial/diffusion.ipynb
 mode change 100755 => 100644 tutorial/diffusion_opencl.ipynb
 mode change 100755 => 100644 tutorial/diffusion_use_optparam.ipynb
 mode change 100755 => 100644 tutorial/grid3d.ipynb
 mode change 100755 => 100644 tutorial/matmul/matmul.cu
 mode change 100755 => 100644 tutorial/matmul/matmul.png
 mode change 100755 => 100644 tutorial/matmul/matmul.py
 mode change 100755 => 100644 tutorial/matmul/matmul_naive.cu
 mode change 100755 => 100644 tutorial/matmul/matmul_naive.png
 mode change 100755 => 100644 tutorial/matmul/matmul_naive.py
 mode change 100755 => 100644 tutorial/matmul/matmul_shared.cu
 mode change 100755 => 100644 tutorial/matmul/matmul_shared.png
 mode change 100755 => 100644 tutorial/matmul/matmul_shared.py
 mode change 100755 => 100644 tutorial/matrix_multiplication.ipynb

diff --git a/.gitattributes b/.gitattributes
old mode 100755
new mode 100644
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
old mode 100755
new mode 100644
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
old mode 100755
new mode 100644
diff --git a/.gitignore b/.gitignore
old mode 100755
new mode 100644
diff --git a/.zenodo.json b/.zenodo.json
old mode 100755
new mode 100644
diff --git a/CHANGELOG.md b/CHANGELOG.md
old mode 100755
new mode 100644
diff --git a/CITATION.cff b/CITATION.cff
old mode 100755
new mode 100644
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
old mode 100755
new mode 100644
diff --git a/INSTALL.rst b/INSTALL.rst
old mode 100755
new mode 100644
diff --git a/LICENSE b/LICENSE
old mode 100755
new mode 100644
diff --git a/MANIFEST.in b/MANIFEST.in
old mode 100755
new mode 100644
diff --git a/README.rst b/README.rst
old mode 100755
new mode 100644
diff --git a/doc/Makefile b/doc/Makefile
old mode 100755
new mode 100644
diff --git a/doc/deploy.sh b/doc/deploy.sh
old mode 100755
new mode 100644
diff --git a/doc/gemm-amd-summary.png b/doc/gemm-amd-summary.png
old mode 100755
new mode 100644
diff --git a/doc/gh_pages-deploy_key.enc b/doc/gh_pages-deploy_key.enc
old mode 100755
new mode 100644
diff --git a/doc/source/conf.py b/doc/source/conf.py
old mode 100755
new mode 100644
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
old mode 100755
new mode 100644
diff --git a/doc/source/correctness.rst b/doc/source/correctness.rst
old mode 100755
new mode 100644
diff --git a/doc/source/design.png b/doc/source/design.png
old mode 100755
new mode 100644
diff --git a/doc/source/design.rst b/doc/source/design.rst
old mode 100755
new mode 100644
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
old mode 100755
new mode 100644
diff --git a/doc/source/hostcode.rst b/doc/source/hostcode.rst
old mode 100755
new mode 100644
diff --git a/doc/source/index.rst b/doc/source/index.rst
old mode 100755
new mode 100644
diff --git a/doc/source/install.rst b/doc/source/install.rst
old mode 100755
new mode 100644
diff --git a/doc/source/templates.rst b/doc/source/templates.rst
old mode 100755
new mode 100644
diff --git a/doc/source/user-api.rst b/doc/source/user-api.rst
old mode 100755
new mode 100644
diff --git a/doc/source/vocabulary.rst b/doc/source/vocabulary.rst
old mode 100755
new mode 100644
diff --git a/examples/README.rst b/examples/README.rst
old mode 100755
new mode 100644
diff --git a/examples/c/matrix_multiply.cpp b/examples/c/matrix_multiply.cpp
old mode 100755
new mode 100644
diff --git a/examples/c/matrix_multiply.py b/examples/c/matrix_multiply.py
old mode 100755
new mode 100644
diff --git a/examples/c/vector_add.py b/examples/c/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda-c++/vector_add.py b/examples/cuda-c++/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda-c++/vector_add_blocksize.py b/examples/cuda-c++/vector_add_blocksize.py
old mode 100755
new mode 100644
diff --git a/examples/cuda-c++/vector_add_cupy.py b/examples/cuda-c++/vector_add_cupy.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution.cu b/examples/cuda/convolution.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution.py b/examples/cuda/convolution.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution_correct.py b/examples/cuda/convolution_correct.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution_streams.cu b/examples/cuda/convolution_streams.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/convolution_streams.py b/examples/cuda/convolution_streams.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/expdist.cu b/examples/cuda/expdist.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/expdist.py b/examples/cuda/expdist.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/matmul.cu b/examples/cuda/matmul.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/matmul.py b/examples/cuda/matmul.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/pnpoly.cu b/examples/cuda/pnpoly.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/pnpoly.py b/examples/cuda/pnpoly.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/pnpoly_host.cu b/examples/cuda/pnpoly_host.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/python_kernel.py b/examples/cuda/python_kernel.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/reduction.cu b/examples/cuda/reduction.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/reduction.py b/examples/cuda/reduction.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/sepconv.py b/examples/cuda/sepconv.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/spmv.cu b/examples/cuda/spmv.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/spmv.py b/examples/cuda/spmv.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/stencil.cu b/examples/cuda/stencil.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/stencil.py b/examples/cuda/stencil.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/test_vector_add.py b/examples/cuda/test_vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/test_vector_add_parameterized.py b/examples/cuda/test_vector_add_parameterized.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/texture.py b/examples/cuda/texture.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add.py b/examples/cuda/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_codegen.py b/examples/cuda/vector_add_codegen.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_cupy.py b/examples/cuda/vector_add_cupy.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_jinja.cu b/examples/cuda/vector_add_jinja.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_jinja.py b/examples/cuda/vector_add_jinja.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_jinja2.py b/examples/cuda/vector_add_jinja2.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_metric.py b/examples/cuda/vector_add_metric.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/vector_add_observers.py b/examples/cuda/vector_add_observers.py
old mode 100755
new mode 100644
diff --git a/examples/cuda/zeromeanfilter.cu b/examples/cuda/zeromeanfilter.cu
old mode 100755
new mode 100644
diff --git a/examples/cuda/zeromeanfilter.py b/examples/cuda/zeromeanfilter.py
old mode 100755
new mode 100644
diff --git a/examples/fortran/test_vector_add.py b/examples/fortran/test_vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add.F90 b/examples/fortran/vector_add.F90
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add.py b/examples/fortran/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add_acc.F90 b/examples/fortran/vector_add_acc.F90
old mode 100755
new mode 100644
diff --git a/examples/fortran/vector_add_acc.py b/examples/fortran/vector_add_acc.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/convolution.cl b/examples/opencl/convolution.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/convolution.py b/examples/opencl/convolution.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/convolution_correct.py b/examples/opencl/convolution_correct.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/matmul.cl b/examples/opencl/matmul.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/matmul.py b/examples/opencl/matmul.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/reduction.cl b/examples/opencl/reduction.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/reduction.py b/examples/opencl/reduction.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/sepconv.py b/examples/opencl/sepconv.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/stencil.cl b/examples/opencl/stencil.cl
old mode 100755
new mode 100644
diff --git a/examples/opencl/stencil.py b/examples/opencl/stencil.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/vector_add.py b/examples/opencl/vector_add.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/vector_add_codegen.py b/examples/opencl/vector_add_codegen.py
old mode 100755
new mode 100644
diff --git a/examples/opencl/vector_add_observers.py b/examples/opencl/vector_add_observers.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/c.py b/kernel_tuner/c.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/cuda.py b/kernel_tuner/cuda.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/cupy.py b/kernel_tuner/cupy.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/kernelbuilder.py b/kernel_tuner/kernelbuilder.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/nvml.py b/kernel_tuner/nvml.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/observers.py b/kernel_tuner/observers.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/opencl.py b/kernel_tuner/opencl.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/runners/__init__.py b/kernel_tuner/runners/__init__.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/__init__.py b/kernel_tuner/strategies/__init__.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py b/kernel_tuner/strategies/bayes_opt_alt_BayesOpt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py b/kernel_tuner/strategies/bayes_opt_alt_HyperOpt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py b/kernel_tuner/strategies/bayes_opt_alt_ScikitOpt.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/hillclimbers.py b/kernel_tuner/strategies/hillclimbers.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/mls.py b/kernel_tuner/strategies/mls.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/ordered_greedy_mls.py b/kernel_tuner/strategies/ordered_greedy_mls.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
old mode 100755
new mode 100644
diff --git a/kernel_tuner/wrappers.py b/kernel_tuner/wrappers.py
old mode 100755
new mode 100644
diff --git a/roadmap.md b/roadmap.md
old mode 100755
new mode 100644
diff --git a/setup.cfg b/setup.cfg
old mode 100755
new mode 100644
diff --git a/setup.py b/setup.py
old mode 100755
new mode 100644
diff --git a/test/__init__.py b/test/__init__.py
old mode 100755
new mode 100644
diff --git a/test/context.py b/test/context.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_minimize.py b/test/strategies/test_minimize.py
old mode 100755
new mode 100644
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
old mode 100755
new mode 100644
diff --git a/test/test_c_functions.py b/test/test_c_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
old mode 100755
new mode 100644
diff --git a/test/test_core.py b/test/test_core.py
old mode 100755
new mode 100644
diff --git a/test/test_cuda_functions.py b/test/test_cuda_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_cuda_mocked.py b/test/test_cuda_mocked.py
old mode 100755
new mode 100644
diff --git a/test/test_cupy_functions.py b/test/test_cupy_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_hyper.py b/test/test_hyper.py
old mode 100755
new mode 100644
diff --git a/test/test_integration.py b/test/test_integration.py
old mode 100755
new mode 100644
diff --git a/test/test_interface.py b/test/test_interface.py
old mode 100755
new mode 100644
diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py
old mode 100755
new mode 100644
diff --git a/test/test_minimize.py b/test/test_minimize.py
old mode 100755
new mode 100644
diff --git a/test/test_observers.py b/test/test_observers.py
old mode 100755
new mode 100644
diff --git a/test/test_opencl_functions.py b/test/test_opencl_functions.py
old mode 100755
new mode 100644
diff --git a/test/test_runners.py b/test/test_runners.py
old mode 100755
new mode 100644
diff --git a/test/test_util_functions.py b/test/test_util_functions.py
old mode 100755
new mode 100644
diff --git a/tutorial/README.md b/tutorial/README.md
old mode 100755
new mode 100644
diff --git a/tutorial/convolution.ipynb b/tutorial/convolution.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/diffusion.ipynb b/tutorial/diffusion.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/diffusion_opencl.ipynb b/tutorial/diffusion_opencl.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/diffusion_use_optparam.ipynb b/tutorial/diffusion_use_optparam.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/grid3d.ipynb b/tutorial/grid3d.ipynb
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul.cu b/tutorial/matmul/matmul.cu
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul.png b/tutorial/matmul/matmul.png
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul.py b/tutorial/matmul/matmul.py
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_naive.cu b/tutorial/matmul/matmul_naive.cu
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_naive.png b/tutorial/matmul/matmul_naive.png
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_naive.py b/tutorial/matmul/matmul_naive.py
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_shared.cu b/tutorial/matmul/matmul_shared.cu
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_shared.png b/tutorial/matmul/matmul_shared.png
old mode 100755
new mode 100644
diff --git a/tutorial/matmul/matmul_shared.py b/tutorial/matmul/matmul_shared.py
old mode 100755
new mode 100644
diff --git a/tutorial/matrix_multiplication.ipynb b/tutorial/matrix_multiplication.ipynb
old mode 100755
new mode 100644

From 531627ac5649b7e2e9b91441c4a85a93b117c19a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 24 Mar 2022 21:02:41 +0100
Subject: [PATCH 006/168] Search spaces are now generated much more efficiently
 using python-constraint, also added general Python runner

---
 kernel_tuner/core.py                          |   7 +-
 kernel_tuner/interface.py                     |   9 +-
 kernel_tuner/python.py                        | 195 +++++++++++-------
 kernel_tuner/runners/sequential.py            |   5 +-
 kernel_tuner/runners/simulation.py            |   6 +-
 .../strategies/bayes_opt_GPyTorch_lean.py     | 113 ++++------
 kernel_tuner/strategies/brute_force.py        |  13 +-
 kernel_tuner/strategies/random_sample.py      |  10 +-
 kernel_tuner/util.py                          | 114 +++++++++-
 setup.py                                      |   2 +-
 10 files changed, 297 insertions(+), 177 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 1faf5deb0..765d0ee21 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -195,7 +195,7 @@ class DeviceInterface(object):
     """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner"""
 
     def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None,
-                 parallel_mode=False):
+                 parallel_mode=False, hyperparam_mode=False):
         """ Instantiate the DeviceInterface, based on language in kernel source
 
         :param kernel_source The kernel sources
@@ -212,7 +212,7 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
         :type device: int
 
         :param lang: Specifies the language used for GPU kernels.
-            Currently supported: "CUDA", "OpenCL", or "C"
+            Currently supported: "CUDA", "OpenCL", "C" or "Python"
         :type lang: string
 
         :param compiler_options: The compiler options to use when compiling kernels for this device.
@@ -241,7 +241,8 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
         elif lang == "C":
             dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations)
         elif lang == "Python":
-            dev = PythonFunctions(iterations=iterations, observers=observers, parallel_mode=parallel_mode, show_progressbar=True)
+            dev = PythonFunctions(iterations=iterations, observers=observers, parallel_mode=parallel_mode, hyperparam_mode=hyperparam_mode,
+                                  show_progressbar=True)
         else:
             raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
 
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 14f5dfd71..d28c41348 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -33,6 +33,7 @@
 import logging
 import sys
 import numpy
+from constraint import Constraint
 
 import kernel_tuner.util as util
 import kernel_tuner.core as core
@@ -403,7 +404,7 @@ def _get_docstring(opts):
 def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params, grid_div_x=None, grid_div_y=None, grid_div_z=None, restrictions=None,
                 answer=None, atol=1e-6, verify=None, verbose=False, lang=None, device=0, platform=0, smem_args=None, cmem_args=None, texmem_args=None,
                 compiler=None, compiler_options=None, log=None, iterations=7, block_size_names=None, quiet=False, strategy=None, strategy_options=None,
-                cache=None, metrics=None, simulation_mode=False, parallel_mode=False, observers=None):
+                cache=None, metrics=None, simulation_mode=False, parallel_mode=False, hyperparam_mode=False, observers=None):
 
     if log:
         logging.basicConfig(filename=kernel_name + datetime.now().strftime('%Y%m%d-%H:%M:%S') + '.log', level=log)
@@ -418,8 +419,8 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
     # check whether block_size_names are used as expected
     util.check_block_size_params_names_list(block_size_names, tune_params)
 
-    # if the restrictions are not callable, make them (increases restrictions check performance significantly)
-    if restrictions is not None and not callable(restrictions):
+    # if the restrictions are not constraints or a callable, the restrictions are strings, so parse them to functions (increases restrictions check performance significantly)
+    if restrictions is not None and not callable(restrictions) and not any(isinstance(r, Constraint) for r in restrictions):
         restrictions = util.parse_restrictions(restrictions)
 
     if iterations < 1:
@@ -469,7 +470,7 @@ def tune_kernel(kernel_name, kernel_source, problem_size, arguments, tune_params
 
     # select the runner for this job based on input
     selected_runner = SimulationRunner if simulation_mode is True else SequentialRunner
-    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers, parallel_mode) as runner:
+    with selected_runner(kernelsource, kernel_options, device_options, iterations, observers, parallel_mode, hyperparam_mode) as runner:
 
         #the user-specified function may or may not have an optional atol argument;
         #we normalize it so that it always accepts atol.
diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
index 69c6ac33c..00f2b24c1 100644
--- a/kernel_tuner/python.py
+++ b/kernel_tuner/python.py
@@ -33,7 +33,7 @@
 class PythonFunctions(object):
     """Class that groups the code for running and compiling C functions"""
 
-    def __init__(self, iterations=7, observers=None, parallel_mode=False, show_progressbar=False):
+    def __init__(self, iterations=7, observers=None, parallel_mode=False, hyperparam_mode=False, show_progressbar=False):
         """instantiate PythonFunctions object used for interacting with Python code
 
         :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
@@ -49,7 +49,12 @@ def __init__(self, iterations=7, observers=None, parallel_mode=False, show_progr
         self.env = env
         self.name = platform.processor()
         self.observers = observers or []
-        self.parallel_mode = parallel_mode
+        self.num_unused_cores = 1    # do not use all cores to do other work
+        self.num_cores = max(min(cpu_count() - self.num_unused_cores, self.iterations), 1)    # assumes cpu_count does not change during the life of this class!
+        self.parallel_mode = parallel_mode and self.num_cores > 1
+        self.hyperparam_mode = hyperparam_mode
+
+        self.benchmark = self.benchmark_normal if not self.hyperparam_mode else self.benchmark_hyperparams
 
         self.benchmark_times = []
 
@@ -87,22 +92,67 @@ def compile(self, kernel_instance):
         delete_temp_file(source_file)
         return func
 
-    def benchmark(self, func, args, threads, grid):
-        """runs the kernel repeatedly, returns averaged returned value
+    def benchmark_normal(self, func, args, threads, grid):
+        """runs the kernel repeatedly, returns times
+
+        :param func: A Python function for this specific configuration
+        :type func: ctypes._FuncPtr
+
+        :param args: A list of arguments to the function, order should match the
+            order in the code. The list should be prepared using
+            ready_argument_list().
+        :type args: list(Argument)
+
+        :param threads: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type threads: any
+
+        :param grid: Ignored, but left as argument for now to have the same
+            interface as CudaFunctions and OpenCLFunctions.
+        :type grid: any
+
+        :returns: All times.
+        :rtype: dict()
+        """
+
+        result = dict()
+        result["times"] = []
+        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
+            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
+
+        # new implementation
+        start_time = perf_counter()
+        if self.parallel_mode:
+            logging.debug(f"Running benchmark in parallel on {self.num_cores} processors")
+            manager = Manager()
+            invalid_flag = manager.Value('i', int(False))
+            values = manager.list()
+            runtimes = manager.list()
+            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
+                args = func, args, self.params, invalid_flag
+                values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                values, runtimes = list(values), list(runtimes)
+            result["strategy_time"] = np.mean(runtimes)
+        else:
+            values = list()
+            for _ in range(self.iterations):
+                value = self.run_kernel(func, args, threads, grid)
+                if value < 0.0:
+                    raise Exception("too many resources requested for launch")
+                values.append(value)
+
+        benchmark_time = perf_counter() - start_time
+        self.benchmark_times.append(benchmark_time)
 
-        The C function tuning is a little bit more flexible than direct CUDA
-        or OpenCL kernel tuning. The C function needs to measure time, or some
-        other quality metric you wish to tune on, on its own and should
-        therefore return a single floating-point value.
+        result["times"] = values
+        result["time"] = np.mean(values)
+        # print(f"Mean: {np.mean(values)}, std: {np.std(values)} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
+        return result
 
-        Benchmark runs the C function repeatedly and returns the average of the
-        values returned by the C function. The number of iterations is set
-        during the creation of the CFunctions object. For all measurements the
-        lowest and highest values are discarded and the rest is included in the
-        average. The reason for this is to be robust against initialization
-        artifacts and other exceptional cases.
+    def benchmark_hyperparams(self, func, args, threads, grid):
+        """runs the kernel repeatedly, returns grandmedian for hyperparameter tuning
 
-        :param func: A C function compiled for this specific configuration
+        :param func: A Python function for this specific configuration
         :type func: ctypes._FuncPtr
 
         :param args: A list of arguments to the function, order should match the
@@ -118,7 +168,7 @@ def benchmark(self, func, args, threads, grid):
             interface as CudaFunctions and OpenCLFunctions.
         :type grid: any
 
-        :returns: All execution times.
+        :returns: All execution hyperparameter scores in the same format as times.
         :rtype: dict()
         """
 
@@ -137,18 +187,17 @@ def benchmark(self, func, args, threads, grid):
 
         # new implementation
         start_time = perf_counter()
-        if self.parallel_mode and cpu_count() > 1:
-            num_procs = max(min(cpu_count() - 2, self.iterations), 1)
-            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+        if self.parallel_mode:
+            logging.debug(f"Running hyperparameter benchmark in parallel on {self.num_cores} processors")
             manager = Manager()
             invalid_flag = manager.Value('i', int(False))
-            MNE_values = manager.list()
+            MWP_values = manager.list()
             runtimes = manager.list()
             warnings_dicts = manager.list()
-            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
                 args = func, args, self.params, invalid_flag
-                MNE_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                MNE_values, runtimes, warnings_dicts = list(MNE_values), list(runtimes), list(warnings_dicts)
+                MWP_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+                MWP_values, runtimes, warnings_dicts = list(MWP_values), list(runtimes), list(warnings_dicts)
             result["strategy_time"] = np.mean(runtimes)
             warning_dict = warnings_dicts[0]
             for key in warning_dict.keys():
@@ -159,12 +208,13 @@ def benchmark(self, func, args, threads, grid):
 
         benchmark_time = perf_counter() - start_time
         self.benchmark_times.append(benchmark_time)
-        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
 
-        grandmean, times = get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations)
+        grandmean, times = get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations)
         result["times"] = times
         result["time"] = grandmean
-        print(f"Grandmean over kernels: {grandmean}, mean MNE per iteration: {np.mean(times)}, std MNE per iteration: {np.std(times)}")
+        print(f"Grandmean: {grandmean} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
+        # print(f"Grandmean: {grandmean}, mean MWP per iteration: {np.mean(times)}, std MWP per iteration: {np.std(times)}")
+        # print(f"In {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
         return result
 
         start_time = perf_counter()
@@ -221,10 +271,10 @@ def benchmark(self, func, args, threads, grid):
         result["time"] = mean_mean_MRE
         return result
 
-    def run_kernel(self, func, args):
+    def run_kernel(self, func, args, threads, grid):
         """runs the kernel once, returns whatever the kernel returns
 
-        :param func: A C function compiled for this specific configuration
+        :param func: A Python function for this specific configuration
         :type func: ctypes._FuncPtr
 
         :param args: A list of arguments to the function, order should match the
@@ -253,12 +303,12 @@ def run_kernel(self, func, args):
     units = {}
 
 
-def run_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
-    """ Function to run a kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
+def run_hyperparam_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
+    """ Function to run a hyperparam kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
     PID = getpid()
-    print(f"Iter {iter+1}, PID {PID}", flush=True)
+    # print(f"Iter {iter+1}, PID {PID}", flush=True)
     func, funcargs, params, invalid_flag = args
-    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
+    logging.debug(f"run_kernel iter {iter} (PID {PID})")
     logging.debug("arguments=" + str([str(arg) for arg in funcargs]))
 
     # run the kernel
@@ -270,8 +320,8 @@ def run_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
     return values, runtime, warning_dict
 
 
-def run_kernel_as_subprocess(iter, args):
-    """ Function to run a kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
+def run_hyperparam_kernel_as_subprocess(iter, args):
+    """ Function to run a hyperparam kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
     func, args, params = args
     PID = getpid()
     # print(f"Iter {iter}, PID {PID}", flush=True)
@@ -298,47 +348,52 @@ def make_kwargstrings(**kwargs) -> list:
     return time
 
 
-def get_grandmedian_and_times(MNE_values, invalid_value, min_valid_iterations=1):
-    """ Get the grandmean (mean of median MNE per kernel) and mean MNE per iteration """
-    MNE_values = np.array(MNE_values)
-    median_MNEs = np.array([])
-    valid_MNE_times = list()
-    # get the mean MNE per kernel
-    for i in range(len(MNE_values[0])):
-        MNE_kernel_values = MNE_values[:, i]
-        valid_MNE_mask = (MNE_kernel_values < invalid_value) & (MNE_kernel_values >= 0)
-        valid_MNE_kernel_values = MNE_kernel_values[valid_MNE_mask]
-        if len(valid_MNE_kernel_values) >= min_valid_iterations:
+def get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations=1):
+    """ Get the grandmean (mean of median MWP per kernel) and mean MWP per iteration """
+    MWP_values = np.array(MWP_values)
+    median_MWPs = np.array([])
+    median_MWPs_vars = np.array([])
+    valid_MWP_times = list()
+    # get the mean MWP per kernel
+    for i in range(len(MWP_values[0])):
+        MWP_kernel_values = MWP_values[:, i]
+        valid_MWP_mask = (MWP_kernel_values < invalid_value) & (MWP_kernel_values >= 0)
+        valid_MWP_kernel_values = MWP_kernel_values[valid_MWP_mask]
+        if len(valid_MWP_kernel_values) >= min_valid_iterations:
             # # filter outliers by keeping only values that are within two times the Median Absolute Deviation
-            # AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            # AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
             # MAD = np.median(AD)
-            # selected_MNE_kernel_values = valid_MNE_kernel_values[AD < MAD * 3]
-            # print(f"Removed {len(valid_MNE_kernel_values) - len(selected_MNE_kernel_values)}")
-            # median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
-            # median_MNEs = np.append(median_MNEs, np.mean(valid_MNE_kernel_values))
+            # selected_MWP_kernel_values = valid_MWP_kernel_values[AD < MAD * 3]
+            # print(f"Removed {len(valid_MWP_kernel_values) - len(selected_MWP_kernel_values)}")
+            # median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
+            # median_MWPs = np.append(median_MWPs, np.mean(valid_MWP_kernel_values))
 
             # filter outliers by keeping only values that are within three times the Median Absolute Deviation
-            AD = np.abs(valid_MNE_kernel_values - np.median(valid_MNE_kernel_values))
+            AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
             MAD = np.median(AD)
             MAD_score = AD / MAD if MAD else 0.0
-            selected_MNE_kernel_values = valid_MNE_kernel_values[MAD_score < 3]
-            median_MNEs = np.append(median_MNEs, np.median(selected_MNE_kernel_values))
+            selected_MWP_kernel_values = valid_MWP_kernel_values[MAD_score < 3]
+            median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
+            median_MWPs_vars = np.append(median_MWPs_vars, np.std(selected_MWP_kernel_values))
         else:
-            median_MNEs = np.append(median_MNEs, invalid_value)
-
-    # get the mean MNE per iteration
-    for i in range(len(MNE_values)):
-        MNE_iteration_values = MNE_values[i]
-        valid_MNE_mask = (MNE_iteration_values < invalid_value) & (MNE_iteration_values >= 0)
-        valid_MNE_iteration_values = MNE_iteration_values[valid_MNE_mask]
-        if len(valid_MNE_iteration_values) > 0:
-            valid_MNE_times.append(np.mean(valid_MNE_iteration_values))
+            median_MWPs = np.append(median_MWPs, invalid_value)
+            median_MWPs_vars = np.append(median_MWPs_vars, 1)
+
+    # get the mean MWP per iteration
+    for i in range(len(MWP_values)):
+        MWP_iteration_values = MWP_values[i]
+        valid_MWP_mask = (MWP_iteration_values < invalid_value) & (MWP_iteration_values >= 0)
+        valid_MWP_iteration_values = MWP_iteration_values[valid_MWP_mask]
+        if len(valid_MWP_iteration_values) > 0:
+            valid_MWP_times.append(np.mean(valid_MWP_iteration_values))
         else:
-            valid_MNE_times.append(invalid_value)
-
-    # get the grandmean by taking the mean over the median MNE per iteration, invalid if one of the kernels is invalid
-    print(median_MNEs)
-    grandmean_MNE = np.mean(median_MNEs)
-    if np.isnan(grandmean_MNE) or len(median_MNEs[median_MNEs >= invalid_value]) > 0:
-        grandmean_MNE = invalid_value
-    return grandmean_MNE, valid_MNE_times
+            valid_MWP_times.append(invalid_value)
+
+    # get the grandmean by taking the inverse-variance weighted average over the median MWP per kernel, invalid if one of the kernels is invalid
+    print(median_MWPs)
+    print(median_MWPs / median_MWPs_vars, np.sum(1 / median_MWPs_vars), np.std(median_MWPs / median_MWPs_vars))
+    inverse_variance_weighted_average = np.sum(median_MWPs / median_MWPs_vars) / np.sum(1 / median_MWPs_vars)
+    grandmean_MWP = inverse_variance_weighted_average
+    if np.isnan(grandmean_MWP) or len(median_MWPs[median_MWPs >= invalid_value]) > 0:
+        grandmean_MWP = invalid_value
+    return grandmean_MWP, valid_MWP_times
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index 20fbfaa7b..65fd13ae7 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -11,7 +11,7 @@
 class SequentialRunner(object):
     """ SequentialRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False, hyperparam_mode=False):
         """ Instantiate the SequentialRunner
 
         :param kernel_source: The kernel source
@@ -30,7 +30,8 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         """
 
         #detect language and create high-level device interface
-        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, parallel_mode=parallel_mode, **device_options).__enter__()
+        self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, parallel_mode=parallel_mode, hyperparam_mode=hyperparam_mode,
+                                   **device_options).__enter__()
 
         self.units = self.dev.units
         self.quiet = device_options.quiet
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 9e58634a5..e0317ed4f 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -88,8 +88,8 @@ def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=No
 
         logging.debug('DeviceInterface instantiated, lang=%s', lang)
 
-        if lang not in ('CUDA', 'OpenCL', 'C'):
-            raise ValueError("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet")
+        if lang not in ('CUDA', 'OpenCL', 'C', 'Python'):
+            raise ValueError("Sorry, support for languages other than CUDA, OpenCL, C or Python is not implemented yet")
         self.lang = lang
         self.dev = SimulationLangFunction(self.lang, device, iterations, compiler_options)
         self.max_threads = 1024
@@ -173,7 +173,7 @@ def __exit__(self, *exc):
 class SimulationRunner(object):
     """ SimulationRunner is used for tuning with a single process/thread """
 
-    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False):
+    def __init__(self, kernel_source, kernel_options, device_options, iterations, observers, parallel_mode=False, hyperparam_mode=False):
         """ Instantiate the SimulationRunner
 
         :param kernel_source: The kernel source
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index e4809be7f..f91463a72 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -82,7 +82,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
-    parameter_space = get_valid_configs(tuning_options, max_threads)
+    parameter_space = util.get_valid_configs(tuning_options, max_threads)
 
     # limit max_fevals to max size of the parameter space
     max_fevals = min(len(parameter_space), max_fevals)
@@ -139,11 +139,11 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
 
         # get tuning options
         self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
-        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)
-        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)
-        self.training_iter = self.get_hyperparam("trainingiter", 1, type=int)
+        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)    # 0.1
+        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)    # 5
+        self.training_after_iter = self.get_hyperparam("trainingafteriter", 1, type=int)    # 1
         self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
-        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 0.5, type=float)
+        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5, type=float)
         self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
         self.optimizer_name = self.get_hyperparam("optimizer", "LBFGS", supported_optimizers)
         self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", self.optimizer_name, type=float, cast=default_optimizer_learningrates)
@@ -153,7 +153,7 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         # set acquisition function options
         self.set_acquisition_function(acquisition_function_name)
         if 'explorationfactor' not in af_params:
-            af_params['explorationfactor'] = 0.1
+            af_params['explorationfactor'] = 0.1    # 0.1
         self.af_params = af_params
 
         # set Tensors
@@ -208,6 +208,10 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
             'lengthscale': np.array([]),
             'noise': np.array([]),
         }
+
+        # initialize the model
+        if not self.runner.simulation_mode:
+            self.import_cached_evaluations()
         self.initialize_model()
 
     @property
@@ -230,7 +234,7 @@ def train_y_err(self):
         """ Get the error on the valid results """
         std = self.results_std[self.valid_configs]
         if self.scaled_output and std.std() > 0.0:
-            std = (std - std.mean()) / std.std()
+            std = (std - std.mean()) / std.std()    # use z-score to get normalized variability
         return std
 
     @property
@@ -243,6 +247,12 @@ def test_x_unscaled(self):
         """ Get the unscaled, not yet visited parameter configurations """
         return self.param_configs[self.unvisited_configs]
 
+    @property
+    def test_y_err(self):
+        """ Get the expected error on the test set """
+        train_y_err = self.train_y_err
+        return torch.full((self.size - len(train_y_err), ), torch.mean(train_y_err))
+
     @property
     def invalid_x(self):
         """ Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid) """
@@ -262,17 +272,17 @@ def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tenso
 
     def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
         """ Initialize the surrogate model """
-        if not self.runner.simulation_mode:
-            self.import_cached_evaluations()
-        self.initial_sample_std = self.min_std
+        # self.initial_sample_std = self.min_std
         if take_initial_sample:
             self.initial_sample()
 
         # create the model
         if self.likelihood_name == 'Gaussian':
             self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        elif self.likelihood_name == 'GaussianPrior':
+            raise NotImplementedError("Gaussian Prior likelihood has not been implemented yet")
         elif self.likelihood_name == 'FixedNoise':
-            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=False)
+            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=True)
         self.likelihood = self.likelihood.to(self.device)
         self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
 
@@ -360,8 +370,8 @@ def initial_sample(self):
         # set the current optimum, initial sample mean and initial sample std
         self.current_optimum = self.opt(self.train_y).item()
         self.initial_sample_mean = self.train_y.mean().item()
-        # self.initial_sample_std = self.train_y.std().item()
-        self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
+        self.initial_sample_std = self.train_y.std().item()
+        # self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
 
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
@@ -471,6 +481,8 @@ def closure():
             except gpytorch.utils.errors.NotPSDError:
                 warnings.warn("Matrix not positive definite during training", NotPSDTrainingWarning)
                 return np.nan
+            except RuntimeError as e:
+                warnings.warn(str(e), RuntimeWarning)
 
         loss = None
         for _ in range(training_iter):
@@ -482,6 +494,9 @@ def closure():
             except gpytorch.utils.errors.NanError:
                 warnings.warn("PSD_safe_Cholesky failed due to too many NaN", NaNTrainingWarning)
                 break
+            except TypeError as e:
+                warnings.warn(str(e), RuntimeWarning)
+                break
 
         # set the hyperparams to the new values
         try:
@@ -518,8 +533,8 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
             else:
                 predictions_tuple = self.predict_list()
-                if self.initial_sample_std <= self.min_std:
-                    self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
+                # if self.initial_sample_std <= self.min_std:
+                # self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
             # if there are NaN or all of the predicted std are the same, take from the least evaluated region
             mean_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[0])).item())
             std_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[1])).item())
@@ -536,7 +551,7 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 warnings.warn(
                     f"After {self.fevals}/{max_fevals} fevals, {warning_reason}, picking one from the least evaluated region and resetting the surrogate model",
                     ResetModelWarning)
-                self.initialize_model(take_initial_sample=False, train_hyperparams=False)
+                self.initialize_model(take_initial_sample=False, train_hyperparams=True)
             else:
                 # otherwise, optimize the acquisition function to find the next candidate
                 hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
@@ -569,8 +584,8 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 last_invalid = False
                 self.model.set_train_data(self.train_x, self.train_y, strict=False)
                 # do not train if there are multiple minima, because it introduces numerical instability or insolvability
-                if self.training_iter > 0:
-                    self.train_hyperparams(training_iter=self.training_iter)
+                if self.training_after_iter > 0 and (self.fevals % self.training_after_iter == 0):
+                    self.train_hyperparams(training_iter=1)    # TODO experiment with other training iter
                 # set the current optimum
                 self.current_optimum = self.opt(self.train_y).item()
             # print(f"Valid: {len(self.train_x)}, unvisited: {len(self.test_x)}, invalid: {len(self.invalid_x)}, last invalid: {last_invalid}")
@@ -603,7 +618,7 @@ def register_result(self, result: float, param_config_index: int):
         if result != self.invalid_value:
             self.valid_configs[param_config_index] = True
             self.results[param_config_index] = result
-            assert last_result['time'] == result
+            # assert last_result['time'] == result TODO remove
             self.results_std[param_config_index] = max(np.std(last_result['times']), self.min_std)
 
         # add the current model parameters to the last entry of the results dict
@@ -634,6 +649,9 @@ def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
             except gpytorch.utils.errors.NotPSDError:
                 warnings.warn("NotPSD error during predictions", NotPSDPredictionWarning)
                 return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
+            except RuntimeError as e:
+                warnings.warn(str(e), RuntimeWarning)
+                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
 
     def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
         """ compute probability of improvement by assuming normality on the difference in improvement """
@@ -656,14 +674,12 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
             improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
             # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
-            x = 1 - min(max(1 - improvement_diff, 0.2), 0.0)
-            # x = 1 - min(max(improvement_diff, 1) * 0.2, 0.0)
+            # x = 1 - max(max(1 - improvement_diff, 0.2), 0.0)
+            x = 1 - max(min(improvement_diff, 1) * 0.2, 0.0)
             # the smaller the difference between the initial sample error and current sample error, the greater the exploration
             # x = 1 - min(max(self.initial_sample_std - std.mean().item(), 1.0), 0.8)
             # print(self.initial_sample_std, std.mean().item())
-            # print(x)
             cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2, y=0.003; at x=0.2057, y=0.0.
-            # print(cv)
             return cv
         else:
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
@@ -821,57 +837,10 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
 
         return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
 
-    def to_xarray(self):
-        # print(self.tuning_options['tune_params'])
-        # print(az.convert_to_inference_data(self.tuning_options['tune_params']).posterior)
-        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
-            posterior = self.model(self.param_configs_scaled)
-            predictive_posterior = self.likelihood(posterior)
-            # print(posterior.variance)
-            # print(az.convert_to_inference_data(posterior.to_data_independent_dist()))
-            # print(len(posterior.covariance_matrix))
-            # print(len(posterior.covariance_matrix[0]))
-            # exit(0)
-
-            # data = az.load_arviz_data('centered_eight')
-            # az.plot_posterior(data, show=True)
-
-            param_configs = list(tuple(pc) for pc in self.param_configs.tolist())
-            # posterior_dict = dict(zip(param_configs, posterior.get_base_samples()))
-            posterior_dict = {
-                'mu': posterior.mean,
-                'var': posterior.variance
-            }
-            predictive_posterior_dict = {
-                'mu': predictive_posterior.mean,
-                'var': predictive_posterior.variance
-            }
-            print(posterior_dict)
-            # predictive_posterior_dict = dict(zip(str(self.param_configs_scaled.numpy()), predictive_posterior.get_base_samples()))
-            # log_prob_dict = dict(zip(self.param_configs_scaled, predictive_posterior.log_prob()))
-            tune_param_keys = np.array(list(self.tune_params.keys()))[self.nonstatic_params]
-            tune_param_values = np.array(list(self.tune_params.values()), dtype=object)[self.nonstatic_params]
-            coordinates = dict(zip(tune_param_keys, tune_param_values))
-            dimensions = dict(zip(tune_param_keys, ([k] for k in tune_param_keys)))
-            print(coordinates)
-            print(dimensions)
-            data = az.from_dict(posterior_dict, posterior_predictive=predictive_posterior_dict)
-            print(az.summary(data))
-            print(data.posterior)
-            print(data.posterior_predictive)
-            az.plot_trace(data, show=True)
-            exit(0)
-            print(data.posterior_predictive)
-
-            # print(az.convert_to_inference_data(posterior.get_base_samples()))
-        # TODO create InferenceData
-        # print(predictive_posterior.sample())
-        # print(az.from_dict())
-        # print(az.convert_to_inference_data(predictive_posterior))
-        exit(0)
-
     def visualize(self):
         """ Visualize the surrogate model and observations in a plot """
+        if self.fevals < 220:
+            return None
         from matplotlib import pyplot as plt
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             # Initialize plot
diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py
index d72713908..b8c148923 100644
--- a/kernel_tuner/strategies/brute_force.py
+++ b/kernel_tuner/strategies/brute_force.py
@@ -34,12 +34,15 @@ def tune(runner, kernel_options, device_options, tuning_options):
     restrictions = tuning_options.restrictions
     verbose = tuning_options.verbose
 
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
+    # # compute cartesian product of all tunable parameters
+    # parameter_space = itertools.product(*tune_params.values())
 
-    # check for search space restrictions
-    if restrictions is not None:
-        parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose), parameter_space)
+    # # check for search space restrictions
+    # if restrictions is not None:
+    #     parameter_space = filter(lambda p: util.check_restrictions(restrictions, p, tune_params.keys(), verbose), parameter_space)
+
+    parameter_space = util.get_valid_configs(tuning_options, runner.dev.max_threads)
+    print(f"Parameter space size: {len(parameter_space)}")
 
     results, env = runner.run(parameter_space, kernel_options, tuning_options)
 
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
index 3b8f20a51..8eef8ded6 100644
--- a/kernel_tuner/strategies/random_sample.py
+++ b/kernel_tuner/strategies/random_sample.py
@@ -5,6 +5,7 @@
 import numpy
 
 from kernel_tuner import util
+from time import perf_counter
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
@@ -35,15 +36,10 @@ def tune(runner, kernel_options, device_options, tuning_options):
 
     fraction = tuning_options.strategy_options.get("fraction", 0.1)
 
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if tuning_options.restrictions is not None:
-        parameter_space = filter(lambda p: util.check_restrictions(tuning_options.restrictions, p, tune_params.keys(), tuning_options.verbose), parameter_space)
+    parameter_space = util.get_valid_configs(tuning_options, runner.dev.max_threads)
 
     # reduce parameter space to a random sample using sample_fraction
-    parameter_space = numpy.array(list(parameter_space))
+    parameter_space = numpy.array(parameter_space)
     size = len(parameter_space)
     fraction = int(numpy.ceil(size * fraction))
     sample_indices = numpy.random.choice(range(size), size=fraction, replace=False)
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 332ac6750..0f18c0f0f 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -4,6 +4,7 @@
 from collections import OrderedDict
 import os
 import errno
+from tabnanny import verbose
 import tempfile
 import logging
 import warnings
@@ -11,6 +12,7 @@
 from types import FunctionType
 
 import numpy as np
+from constraint import Problem, Constraint, FunctionConstraint
 try:
     import cupy as cp
 except ImportError:
@@ -122,9 +124,8 @@ def check_block_size_params_names_list(block_size_names, tune_params):
             warnings.warn("None of the tunable parameters specify thread block dimensions!", UserWarning)
 
 
-def check_restrictions(restrictions, element, keys, verbose):
+def check_restrictions(restrictions, params, verbose):
     """ check whether a specific instance meets the search space restrictions """
-    params = OrderedDict(zip(keys, element))
     valid = True
     if callable(restrictions):
         valid = restrictions(params)
@@ -140,14 +141,23 @@ def check_restrictions(restrictions, element, keys, verbose):
     return valid
 
 
+def check_thread_block_dimensions(params, max_threads, block_size_names=None):
+    """ check on maximum thread block dimensions """
+    dims = get_thread_block_dimensions(params, block_size_names)
+    return np.prod(dims) <= max_threads
+
+
 def config_valid(config, tuning_options, max_threads):
     """ combines restrictions and a check on the max thread block dimension to check config validity """
     legal = True
-    if tuning_options.restrictions:
-        legal = check_restrictions(tuning_options.restrictions, config, tuning_options.tune_params.keys(), False)
     params = OrderedDict(zip(tuning_options.tune_params.keys(), config))
-    dims = get_thread_block_dimensions(params, tuning_options.get("block_size_names", None))
-    return legal and np.prod(dims) <= max_threads
+    if tuning_options.restrictions:
+        legal = check_restrictions(tuning_options.restrictions, params, False)
+        if not legal:
+            return False
+    block_size_names = tuning_options.get("block_size_names", None)
+    valid_thread_block_dimensions = check_thread_block_dimensions(params, max_threads, block_size_names)
+    return legal and valid_thread_block_dimensions
 
 
 def delete_temp_file(filename):
@@ -262,10 +272,42 @@ def get_kernel_string(kernel_source, params=None):
 
 def get_valid_configs(tuning_options, max_threads) -> list:
     """ compute valid configurations in a search space based on restrictions and max_threads"""
-    parameter_space = itertools.product(*tuning_options.tune_params.values())
-    if tuning_options.restrictions is not None:
-        parameter_space = filter(lambda p: config_valid(p, tuning_options, max_threads), parameter_space)
-    return list(parameter_space)
+    restrictions = tuning_options.restrictions
+    tune_params = tuning_options.tune_params
+    param_names = list(tune_params.keys())
+
+    # instantiate the parameter space with all the variables
+    parameter_space = Problem()
+    for param_name, param_values in tune_params.items():
+        parameter_space.addVariable(param_name, param_values)
+
+    # add the user-specified restrictions as constraints on the parameter space
+    if isinstance(restrictions, list):
+        for restriction in restrictions:
+            if isinstance(restriction, FunctionConstraint):
+                parameter_space.addConstraint(restriction, param_names)
+            elif isinstance(restriction, Constraint):
+                parameter_space.addConstraint(restriction)
+            else:
+                raise ValueError(f"Unrecognized restriction {restriction}")
+    # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!)
+    elif callable(restrictions):
+        restrictions_wrapper = lambda *args: check_restrictions(restrictions, dict(zip(param_names, args)), False)
+        parameter_space.addConstraint(restrictions_wrapper, param_names)
+
+    # add the default blocksize threads restrictions last, because it is unlikely to reduce the parameter space by much
+    block_size_names = tuning_options.get("block_size_names", default_block_size_names)
+    block_size_names = list(block_size_name for block_size_name in block_size_names if block_size_name in param_names)
+    if len(block_size_names) > 0:
+        parameter_space.addConstraint(MaxProdConstraint(max_threads), block_size_names)
+
+    # construct the parameter space with the constraints applied
+    parameter_space = parameter_space.getSolutions()
+    # form the parameter tuples in the order specified by tune_params.keys()
+    parameter_space_list = list()
+    for params in parameter_space:
+        parameter_space_list.append(tuple(params[param_name] for param_name in param_names))
+    return parameter_space_list
 
 
 def get_number_of_valid_configs(tuning_options, max_threads) -> int:
@@ -691,3 +733,55 @@ def parse_restrictions(restrictions: list):
     code_object = compile(parsed_restrictions, '<string>', 'exec')
     func = FunctionType(code_object.co_consts[0], globals())
     return func
+
+
+class MaxProdConstraint(Constraint):
+    """
+    Constraint enforcing that values of given variables prod up to
+    a given amount
+    Example:
+    >>> problem = Problem()
+    >>> problem.addVariables(["a", "b"], [1, 2])
+    >>> problem.addConstraint(MaxProdConstraint(3))
+    >>> sorted(sorted(x.items()) for x in problem.getSolutions())
+    [[('a', 1), ('b', 1)], [('a', 1), ('b', 2)], [('a', 2), ('b', 1)]]
+    """
+
+    def __init__(self, maxprod):
+        """
+        @param maxprod: Value to be considered as the maximum prod
+        @type  maxprod: number
+        @param multipliers: If given, variable values will be multiplied by
+                            the given factors before being prodmed to be checked
+        """
+        self._maxprod = maxprod
+
+    def preProcess(self, variables, domains, constraints, vconstraints):
+        Constraint.preProcess(self, variables, domains, constraints, vconstraints)
+        maxprod = self._maxprod
+        for variable in variables:
+            domain = domains[variable]
+            for value in domain[:]:
+                if value > maxprod:
+                    domain.remove(value)
+
+    def __call__(self, variables, domains, assignments, forwardcheck=False):
+        maxprod = self._maxprod
+        prod = 1
+        for variable in variables:
+            if variable in assignments:
+                prod *= assignments[variable]
+        if type(prod) is float:
+            prod = round(prod, 10)
+        if prod > maxprod:
+            return False
+        if forwardcheck:
+            for variable in variables:
+                if variable not in assignments:
+                    domain = domains[variable]
+                    for value in domain[:]:
+                        if prod + value > maxprod:
+                            domain.hideValue(value)
+                    if not domain:
+                        return False
+        return True
diff --git a/setup.py b/setup.py
index 113c18d33..ca7ce56c2 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@ def readme():
         'Topic :: System :: Distributed Computing',
         'Development Status :: 5 - Production/Stable',
     ],
-    install_requires=['numpy>=1.13.3', 'scipy>=0.18.1', 'jsonschema'],
+    install_requires=['numpy>=1.13.3', 'scipy>=0.18.1', 'jsonschema', 'python-constraint>=1.4.0'],
     extras_require={
         'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx', 'pytest', 'ipython'],
         'cuda': ['pycuda', pynvml],

From 5983e249aa17b6b41606be638d0aafd63604c163 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Mar 2022 15:05:06 +0100
Subject: [PATCH 007/168] Added backwards compatibility with most
 python-constraint Constraints for on-the-fly restriction-checking algorithms

---
 kernel_tuner/strategies/basinhopping.py |  2 +-
 kernel_tuner/strategies/minimize.py     |  2 +-
 kernel_tuner/util.py                    | 38 +++++++++++++++++++++++--
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
index 4a434434a..46c68ea61 100644
--- a/kernel_tuner/strategies/basinhopping.py
+++ b/kernel_tuner/strategies/basinhopping.py
@@ -37,7 +37,7 @@ def tune(runner, kernel_options, device_options, tuning_options):
     method = tuning_options.strategy_options.get("method", "L-BFGS-B")
     T = tuning_options.strategy_options.get("T", 1.0)
 
-    #s cale variables in x to make 'eps' relevant for multiple variables
+    # scale variables in x to make 'eps' relevant for multiple variables
     tuning_options["scaling"] = True
 
     bounds, x0, eps = get_bounds_x0_eps(tuning_options)
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
index eb9b1b81b..e475890a8 100644
--- a/kernel_tuner/strategies/minimize.py
+++ b/kernel_tuner/strategies/minimize.py
@@ -81,7 +81,7 @@ def _cost_func(x, kernel_options, tuning_options, runner, results, check_restric
 
     # check if this is a legal (non-restricted) parameter instance
     if check_restrictions and tuning_options.restrictions:
-        legal = util.check_restrictions(tuning_options.restrictions, params, tuning_options.tune_params.keys(), tuning_options.verbose)
+        legal = util.config_valid(params, tuning_options, runner.dev.max_threads)
         if not legal:
             error_result = OrderedDict(zip(tuning_options.tune_params.keys(), params))
             error_result["time"] = error_time
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index a5a0f0eca..1c690c1fa 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1,4 +1,5 @@
 """ Module for kernel tuner utility functions """
+from argparse import ArgumentError
 import itertools
 import json
 from collections import OrderedDict
@@ -12,7 +13,7 @@
 from types import FunctionType
 
 import numpy as np
-from constraint import Problem, Constraint, FunctionConstraint
+from constraint import Problem, Constraint, AllDifferentConstraint, AllEqualConstraint, MaxSumConstraint, ExactSumConstraint, MinSumConstraint, InSetConstraint, NotInSetConstraint, SomeInSetConstraint, SomeNotInSetConstraint, FunctionConstraint
 try:
     import cupy as cp
 except ImportError:
@@ -135,8 +136,16 @@ def check_restrictions(restrictions, params, verbose):
     else:
         for restrict in restrictions:
             try:
-                if not eval(replace_param_occurrences(restrict, params)):
+                # if it's a python-constraint, convert to function and execute
+                if isinstance(restrict, Constraint):
+                    restrict = convert_constraint_restriction(restrict)
+                    if not restrict(params.values()):
+                        valid = False
+                        break
+                # if it's a string, fill in the parameters and evaluate
+                elif not eval(replace_param_occurrences(restrict, params)):
                     valid = False
+                    break
             except ZeroDivisionError:
                 pass
     if not valid and verbose:
@@ -144,6 +153,31 @@ def check_restrictions(restrictions, params, verbose):
     return valid
 
 
+def convert_constraint_restriction(restrict: Constraint):
+    """ Convert the python-constraint to a function for backwards compatibility """
+    if isinstance(restrict, FunctionConstraint):
+        f_restrict = lambda p: restrict._func(*p)
+    elif isinstance(restrict, AllDifferentConstraint):
+        f_restrict = lambda p: len(set(p)) == len(p)
+    elif isinstance(restrict, AllEqualConstraint):
+        f_restrict = lambda p: all(x == p[0] for x in p)
+    elif isinstance(restrict, MaxProdConstraint):
+        f_restrict = lambda p: np.prod(p) <= restrict._exactsum
+    elif isinstance(restrict, MaxSumConstraint):
+        f_restrict = lambda p: sum(p) <= restrict._exactsum
+    elif isinstance(restrict, ExactSumConstraint):
+        f_restrict = lambda p: sum(p) == restrict._exactsum
+    elif isinstance(restrict, MinSumConstraint):
+        f_restrict = lambda p: sum(p) >= restrict._exactsum
+    elif isinstance(restrict, (InSetConstraint, NotInSetConstraint, SomeInSetConstraint, SomeNotInSetConstraint)):
+        raise NotImplementedError(
+            f"Restriction of the type {type(restrict)} is explicitely not supported in backwards compatibility mode, because the behaviour is too complex. Please rewrite this constraint to a function to use it with this algorithm."
+        )
+    else:
+        raise TypeError(f"Unrecognized restriction {restrict}")
+    return f_restrict
+
+
 def check_thread_block_dimensions(params, max_threads, block_size_names=None):
     """ check on maximum thread block dimensions """
     dims = get_thread_block_dimensions(params, block_size_names)

From f4c8e0b3117025838d801e48524a96dce0b53b09 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 5 Apr 2022 18:24:26 +0200
Subject: [PATCH 008/168] Added new minmax initial sampling

---
 .../strategies/bayes_opt_GPyTorch_lean.py     | 84 ++++++++++++++++++-
 kernel_tuner/util.py                          |  6 +-
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index 2cdc49e08..59e385421 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -20,7 +20,7 @@
 
 # set supported hyperparameter values
 supported_precisions = ['float', 'double']
-supported_initial_sample_methods = ['lhs', 'index', 'random']
+supported_initial_sample_methods = ['lhs', 'index', 'minmax','random']
 supported_methods = ['ei', 'poi', 'random']
 supported_cov_kernels = ['matern', 'matern_scalekernel']
 supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
@@ -354,6 +354,8 @@ def initial_sample(self):
                     continue
                 list_param_config_indices.append(param_config_index)
                 self.evaluate_config(param_config_index)
+        elif self.initial_sample_method == 'minmax':
+            list_param_config_indices += self.take_min_max_initial_samples(list_param_config_indices)
 
         # then take index-spaced samples until all samples are valid
         while self.fevals < self.num_initial_samples:
@@ -441,6 +443,86 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
                     "Perhaps try something other than LHS."))
         return param_configs_indices
 
+    def take_min_max_initial_samples(self, list_param_config_indices: list, samples_per_parameter=1) -> list:
+        """ Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters """
+        # number of samples required is at least (samples_per_parameter) * (number of parameters) + 1
+
+        # first get the individual parameter values and sort them
+        params_values = list(self.tune_params.values())
+        for param_values in params_values:
+            param_values.sort()
+
+        number_of_params = len(params_values)
+        if self.num_initial_samples - self.fevals < samples_per_parameter * number_of_params + 1:
+            raise ValueError(f"There are not enough initial samples available ({self.num_initial_samples - self.fevals}) to do minmax initial sampling. At least {samples_per_parameter * number_of_params + 1} samples are required.")
+
+        # then take the minimum parameter configuration using BFS, this is used as the base
+        # instead of BFS, you could also search for the minimal sum of indices
+        minimum_index = None
+        param_level = 0
+        param_moving_index = -1
+        while minimum_index is None and self.num_initial_samples - self.fevals:
+            # create the minimum base configuration and find it in the search space
+            selected_param_config = torch.tensor(tuple(param_values[param_level+1] if param_index == param_moving_index else param_values[min(param_level, len(param_values)-1)] for param_index, param_values in enumerate(params_values)), dtype=self.dtype).to(self.device)
+            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
+            match_mask = (matching_params == number_of_params)
+            found_num_matching_param_configs = match_mask.count_nonzero()
+            temp_index = self.index_counter[match_mask]
+            # check if the configuration exists and is succesfully evaluated
+            if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
+                minimum_index = temp_index.item()
+                minimum_config = self.param_configs[minimum_index]
+                if minimum_index not in list_param_config_indices:
+                    list_param_config_indices.append(minimum_index)
+            # if it doesn't exist and evaluate, do a breadth-first search for the minimum configuration
+            else:
+                proceed = False
+                while not proceed:
+                    # first look at the current level
+                    if param_moving_index < len(params_values) - 1:
+                        param_moving_index += 1
+                        # if the param_level + 1 exceeds the number of parameters, try the next parameter
+                        if len(params_values[param_moving_index]) <= param_level + 1:
+                            param_moving_index += 1
+                        else:
+                            proceed = True
+                    # if nothing is found, proceed to the next level
+                    else:
+                        param_level += 1
+                        param_moving_index = -1
+                        proceed = True
+        if minimum_index is None:
+            raise ValueError(f"Could not evaluate the minimum base configuration in {self.num_initial_samples} samples.")
+
+        # next take the maximum for each individual parameter using DFS
+        for param_index, param_values in enumerate(params_values):
+            if len(param_values) <= 1:
+                continue
+            maximum_index = None
+            param_moving_level = len(param_values) - 1
+            while maximum_index is None and self.num_initial_samples - self.fevals > 0:
+                # take the minimum configuration as base
+                selected_param_config = minimum_config.clone()
+                # change only the currently selected parameter and look up the configuration in the search space
+                selected_param_config[param_index] = param_values[param_moving_level]
+                matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
+                match_mask = (matching_params == number_of_params)
+                found_num_matching_param_configs = match_mask.count_nonzero()
+                temp_index = self.index_counter[match_mask]
+                if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
+                    maximum_index = temp_index.item()
+                    if maximum_index not in list_param_config_indices:
+                        list_param_config_indices.append(maximum_index)
+                # if it doesn't exist and evaluate, move one parameter value down
+                else:
+                    param_moving_level -= 1
+                    if param_moving_level < 0:
+                        raise ValueError(f"No instance of parameter {param_index} is present in the search space and succesfully evaluated")
+            if maximum_index is None:
+                raise ValueError(f"Could not evaluate the maximum configuration for {param_index+1} out of {len(params_values)} within {self.num_initial_samples} samples.")
+
+        return list_param_config_indices
+
     def get_middle_index_of_least_evaluated_region(self) -> int:
         """ Get the middle index of the region of parameter configurations that is the least visited """
         # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 1c690c1fa..ca433bbfd 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -343,7 +343,11 @@ def get_valid_configs(tuning_options, max_threads) -> list:
     # form the parameter tuples in the order specified by tune_params.keys()
     parameter_space_list = list()
     for params in parameter_space:
-        parameter_space_list.append(tuple(params[param_name] for param_name in param_names))
+        param_config = tuple(params[param_name] for param_name in param_names)
+        if param_config not in parameter_space_list:
+            parameter_space_list.append(param_config)
+        else:
+            print(f"Duplicate {param_config}")
     return parameter_space_list
 
 

From b33c6bd05abc9518c1324b4259f8355310a264a2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 12:44:32 -0700
Subject: [PATCH 009/168] Skip strategies that don't have their dependencies
 installed

---
 test/context.py                    | 23 +++++++++++++++++++----
 test/strategies/__init__.py        |  0
 test/strategies/test_strategies.py | 14 ++++++++++++--
 3 files changed, 31 insertions(+), 6 deletions(-)
 create mode 100644 test/strategies/__init__.py

diff --git a/test/context.py b/test/context.py
index ba5030430..e99591764 100644
--- a/test/context.py
+++ b/test/context.py
@@ -1,7 +1,7 @@
-import sys
-import subprocess
-import shutil
 import os
+import shutil
+import subprocess
+import sys
 
 import pytest
 
@@ -47,7 +47,6 @@
     cupy_present = False
 
 try:
-    import cuda
 
     cuda_present = True
 except Exception:
@@ -63,6 +62,20 @@
 except ImportError:
     pyhip_present = False
 
+try:
+    import botorch
+    import torch
+    bayes_opt_botorch_present = True
+except ImportError:
+    bayes_opt_botorch_present = False
+
+try:
+    import gpytorch
+    import torch
+    bayes_opt_gpytorch_present = True
+except ImportError:
+    bayes_opt_gpytorch_present = False
+
 try:
     from autotuning_methodology.report_experiments import get_strategy_scores
     methodology_present = True
@@ -89,6 +102,8 @@
 skip_if_no_openmp = pytest.mark.skipif(not openmp_present, reason="No OpenMP found")
 skip_if_no_openacc = pytest.mark.skipif(not openacc_present, reason="No nvc++ on PATH")
 skip_if_no_pyhip = pytest.mark.skipif(not pyhip_present, reason="No PyHIP found")
+skip_if_no_bayesopt_gpytorch = pytest.mark.skipif(not bayes_opt_gpytorch_present, reason="Torch and GPyTorch not installed")
+skip_if_no_bayesopt_botorch = pytest.mark.skipif(not bayes_opt_botorch_present, reason="Torch and BOTorch not installed")
 skip_if_no_methodology = pytest.mark.skipif(not methodology_present, reason="Autotuning Methodology not found")
 
 
diff --git a/test/strategies/__init__.py b/test/strategies/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 096be38b0..57c43b4f7 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -7,6 +7,8 @@
 from kernel_tuner import util
 from kernel_tuner.interface import strategy_map
 
+from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
+
 cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/../test_cache_file.json"
 
 @pytest.fixture
@@ -32,8 +34,16 @@ def vector_add():
 
     return ["vector_add", kernel_string, size, args, tune_params]
 
-
-@pytest.mark.parametrize('strategy', strategy_map)
+# skip some strategies if their dependencies are not installed
+strategies = []
+for s in strategy_map.keys():
+    if 'gpytorch' in s.lower():
+        strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
+    elif 'botorch' in s.lower():
+        strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_botorch))
+    else:
+        strategies.append(s)
+@pytest.mark.parametrize('strategy', strategies)
 def test_strategies(vector_add, strategy):
 
     options = dict(popsize=5, neighbor='adjacent')

From 208fe7bad498808e1455d925093a5dd154f72c49 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 16:07:16 -0700
Subject: [PATCH 010/168] Tuning new optimization algorithm

---
 kernel_tuner/hyper.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index b94c58986..37235a26b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -86,13 +86,20 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
+    # hyperparams = {
+    #     'popsize': [10, 20, 30],
+    #     'maxiter': [50, 100, 150],
+    #     'w': [0.25, 0.5, 0.75],
+    #     'c1': [1.0, 2.0, 3.0],
+    #     'c2': [0.5, 1.0, 1.5]
+    # }
+    # result, env = tune_hyper_params('pso', hyperparams)
     hyperparams = {
-        'popsize': [10, 20, 30],
-        'maxiter': [50, 100, 150],
-        'w': [0.25, 0.5, 0.75],
-        'c1': [1.0, 2.0, 3.0],
-        'c2': [0.5, 1.0, 1.5]
+        'neighbor': ['Hamming', 'adjacent'],
+        'restart': [True, False],
+        'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
+        'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
     }
-    result, env = tune_hyper_params('pso', hyperparams)
+    result, env = tune_hyper_params('greedy_ils', hyperparams)
     print(result)
     print(env['best_config'])

From 6281a0c012ff8d71da2a6dd2c032db9722805518 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 16:18:30 -0700
Subject: [PATCH 011/168] Added new BO strategies to interface

---
 kernel_tuner/interface.py | 4 ++++
 test/context.py           | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 06b5058fe..a557ae589 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -50,6 +50,10 @@
 from kernel_tuner.strategies import (
     basinhopping,
     bayes_opt,
+    bayes_opt_alt_BOTorch,
+    bayes_opt_GPyTorch,
+    bayes_opt_GPyTorch_lean,
+    bayes_opt_old,
     brute_force,
     diff_evo,
     dual_annealing,
diff --git a/test/context.py b/test/context.py
index e99591764..e7bb7cbfa 100644
--- a/test/context.py
+++ b/test/context.py
@@ -47,7 +47,8 @@
     cupy_present = False
 
 try:
-
+    import cuda
+    print(cuda)
     cuda_present = True
 except Exception:
     cuda_present = False

From 5ab70df39b7fe3ec368ea03b4dcf0f4c3ffc526c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 16:19:30 -0700
Subject: [PATCH 012/168] Made BO GPyTorch implementations importable

---
 kernel_tuner/strategies/bayes_opt_GPyTorch.py | 148 +++++++-------
 .../strategies/bayes_opt_GPyTorch_lean.py     | 185 ++++++++++--------
 2 files changed, 175 insertions(+), 158 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
index 784c7d6c0..39da1c30d 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
@@ -1,9 +1,8 @@
-""" Bayesian Optimization implementation from the thesis by Willemsen """
-from copy import deepcopy
-from random import randint, shuffle
+"""Bayesian Optimization implementation from the thesis by Willemsen."""
 import itertools
-import warnings
 import time
+from copy import deepcopy
+from random import randint, shuffle
 from typing import Tuple
 
 import numpy as np
@@ -11,23 +10,42 @@
 
 # BO imports
 try:
-    import torch
     import gpytorch
-    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
+    import torch
     from sklearn.exceptions import ConvergenceWarning
+    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
     from skopt.sampler import Lhs
     bayes_opt_present = True
+
+    class ExactGPModel(gpytorch.models.ExactGP):
+        """Very simple exact Gaussian Process model."""
+
+        def __init__(self, train_x, train_y, likelihood):
+            super(gpytorch.models.ExactGP, self).__init__(train_x, train_y, likelihood)
+            self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
+            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
+
+        def forward(self, x):
+            mean_x = self.mean_module(x)
+            covar_x = self.covar_module(x)
+            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
 except ImportError:
     bayes_opt_present = False
 
-from kernel_tuner.strategies import minimize
+    class ExactGPModel():
+        def __init__(self, train_x, train_y, likelihood):
+            raise ImportError("GPyTorch not imported")
+        def forward(self, x):
+            raise ImportError("GPyTorch not imported")
+
 from kernel_tuner import util
+from kernel_tuner.strategies import minimize
 
 supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
 
 
 def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """ Generates normalization and denormalization dictionaries """
+    """Generates normalization and denormalization dictionaries."""
     original_to_normalized = dict()
     normalized_to_original = dict()
     for param_name in tune_params.keys():
@@ -43,14 +61,14 @@ def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict
 
 
 def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """ Normalize the parameter space given a normalization dictionary """
+    """Normalize the parameter space given a normalization dictionary."""
     keys = list(tune_params.keys())
     param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
     return param_space_normalized
 
 
 def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict: dict, max_threads: int):
-    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
     pruned_tune_params_mask = list()
     removed_tune_params = list()
     param_names = list(tune_params.keys())
@@ -73,7 +91,7 @@ def prune_parameter_space(parameter_space, tuning_options, tune_params, normaliz
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -95,6 +113,10 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
+    if not bayes_opt_present:
+        raise ImportError(
+            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
+        )
 
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
     prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
@@ -137,20 +159,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     return results, runner.dev.get_environment()
 
 
-class ExactGPModel(gpytorch.models.ExactGP):
-    """ Very simple exact Gaussian Process model """
-
-    def __init__(self, train_x, train_y, likelihood):
-        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
-        self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
-        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
-
-    def forward(self, x):
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
-        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-
-
 class BayesianOptimization():
 
     def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
@@ -170,8 +178,8 @@ def get_hyperparam(name: str, default, supported_values=list()):
             return value
 
         # get hyperparameters
-        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
-        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
+        get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
+        get_hyperparam("covariancelengthscale", 1.5)
         acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
         acq = acquisition_function
         acq_params = get_hyperparam("methodparams", {})
@@ -276,19 +284,19 @@ def current_optimum(self, value: float):
         self.__current_optimum = value
 
     def is_better_than(self, a: float, b: float) -> bool:
-        """ Determines which one is better depending on optimization direction """
+        """Determines which one is better depending on optimization direction."""
         return a < b if self.opt_direction == 'min' else a > b
 
     def is_not_visited(self, index: int) -> bool:
-        """ Returns whether a searchspace index has not been visited """
+        """Returns whether a searchspace index has not been visited."""
         return not self.__visited_searchspace_indices[index]
 
     def is_valid(self, observation: float) -> bool:
-        """ Returns whether an observation is valid """
-        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+        """Returns whether an observation is valid."""
+        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
 
     def get_af_by_name(self, name: str):
-        """ Get the basic acquisition functions by their name """
+        """Get the basic acquisition functions by their name."""
         basic_af_names = ['ei', 'poi', 'lcb']
         if name == 'ei':
             return self.af_expected_improvement
@@ -299,7 +307,7 @@ def get_af_by_name(self, name: str):
         raise ValueError(f"{name} not in {basic_af_names}")
 
     def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function """
+        """Set the acquisition function."""
         if acquisition_function == 'poi':
             self.__af = self.af_probability_of_improvement
         elif acquisition_function == 'ei':
@@ -320,16 +328,16 @@ def set_acquisition_function(self, acquisition_function: str):
             raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
 
     def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """ Set the surrogate model with a covariance function and lengthscale """
+        """Set the surrogate model with a covariance function and lengthscale."""
         # TODO remove or adapt this
         if cov_kernel_name == "constantrbf":
-            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
+            ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
         elif cov_kernel_name == "rbf":
-            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
+            RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
         elif cov_kernel_name == "matern32":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
+            Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
         elif cov_kernel_name == "matern52":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
+            Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
         else:
             raise ValueError(f"Acquisition function must be one of {self.supported_cov_kernels}, is {cov_kernel_name}")
         likelihood = gpytorch.likelihoods.GaussianLikelihood()
@@ -337,7 +345,7 @@ def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: floa
         # self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
 
     def valid_params_observations(self) -> Tuple[list, list]:
-        """ Returns a list of valid observations and their parameter configurations """
+        """Returns a list of valid observations and their parameter configurations."""
         # if you do this every iteration, better keep it as cache and update in update_after_evaluation
         params = list()
         observations = list()
@@ -348,30 +356,30 @@ def valid_params_observations(self) -> Tuple[list, list]:
         return params, observations
 
     def unvisited(self) -> list:
-        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
         params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
         return params
 
     def find_param_config_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the search space if it exists """
+        """Find a parameter config index in the search space if it exists."""
         return self.searchspace.index(param_config)
 
     def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the unvisited cache if it exists """
+        """Find a parameter config index in the unvisited cache if it exists."""
         return self.unvisited_cache.index(param_config)
 
     def normalize_param_config(self, param_config: tuple) -> tuple:
-        """ Normalizes a parameter configuration """
+        """Normalizes a parameter configuration."""
         normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return normalized
 
     def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """ Denormalizes a parameter configuration """
+        """Denormalizes a parameter configuration."""
         denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return denormalized
 
     def unprune_param_config(self, param_config: tuple) -> tuple:
-        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
         unpruned = list()
         pruned_count = 0
         for removed in self.removed_tune_params:
@@ -383,7 +391,7 @@ def unprune_param_config(self, param_config: tuple) -> tuple:
         return tuple(unpruned)
 
     def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """ Adjust the visited and valid index records accordingly """
+        """Adjust the visited and valid index records accordingly."""
         validity = self.is_valid(observation)
         self.__visited_num += 1
         self.__observations[index] = observation
@@ -398,11 +406,11 @@ def update_after_evaluation(self, observation: float, index: int, param_config:
                 self.current_optimum = observation
 
     def predict(self, x) -> Tuple[float, float]:
-        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
         return self.__model.predict([x], return_std=True)
 
     def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
-        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
         with torch.no_grad(), gpytorch.settings.fast_pred_var():
             # TODO use torch.cuda for GPU
             test_x = torch.Tensor(lst)
@@ -412,7 +420,7 @@ def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
             return mu.numpy(), std.numpy()
 
     def evaluate_objective_function(self, param_config: tuple) -> float:
-        """ Evaluates the objective function """
+        """Evaluates the objective function."""
         param_config = self.unprune_param_config(param_config)
         denormalized_param_config = self.denormalize_param_config(param_config)
         if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
@@ -423,7 +431,7 @@ def evaluate_objective_function(self, param_config: tuple) -> float:
         return val
 
     def add_model_hyperparams_to_result(self, param_config: tuple):
-        """ Add the model parameters (loss and noise) to the results dict at the last result """
+        """Add the model parameters (loss and noise) to the results dict at the last result."""
         # assert that the results index corresponds to the last index
         assert self.find_config_index_in_results(param_config) == len(self.results) - 1
 
@@ -432,7 +440,7 @@ def add_model_hyperparams_to_result(self, param_config: tuple):
             self.results[-1][key] = value
 
     def find_config_index_in_results(self, param_config: tuple):
-        """ Find the index of a parameter configuration in the results. Beware that this can be very slow! """
+        """Find the index of a parameter configuration in the results. Beware that this can be very slow!"""
         found_indices = list()
         for results_index, result_dict in enumerate(self.results):
             keys = list(result_dict.keys())
@@ -446,11 +454,11 @@ def find_config_index_in_results(self, param_config: tuple):
         return found_indices[0]
 
     def dimensions(self) -> list:
-        """ List of parameter values per parameter """
+        """List of parameter values per parameter."""
         return self.tune_params.values()
 
     def draw_random_sample(self) -> Tuple[list, int]:
-        """ Draw a random sample from the unvisited parameter configurations """
+        """Draw a random sample from the unvisited parameter configurations."""
         if len(self.unvisited_cache) < 1:
             raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
         index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
@@ -459,7 +467,7 @@ def draw_random_sample(self) -> Tuple[list, int]:
         return param_config, actual_index
 
     def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """ Draws an LHS-distributed sample from the search space """
+        """Draws an LHS-distributed sample from the search space."""
         if self.searchspace_size < num_samples:
             raise ValueError("Can't sample more than the size of the search space")
         if self.sampling_crit is None:
@@ -482,7 +490,7 @@ def draw_latin_hypercube_samples(self, num_samples: int) -> list:
         return list(zip(normalized_param_configs, indices))
 
     def train_model_hyperparams(self):
-        """ Train the model and likelihood hyperparameters """
+        """Train the model and likelihood hyperparameters."""
         # set to training modes
         self.__model.train()
         self.__likelihood.train()
@@ -494,8 +502,6 @@ def train_model_hyperparams(self):
         mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.__likelihood, self.__model)
 
         loss = 0
-        lengthscale = 0
-        noise = 0
         for i in range(self.training_iter):
             # Zero gradients from previous iteration
             optimizer.zero_grad()
@@ -521,7 +527,7 @@ def train_model_hyperparams(self):
         # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
 
     def initial_sample(self):
-        """ Draws an initial sample using random sampling """
+        """Draws an initial sample using random sampling."""
         if self.num_initial_samples <= 0:
             raise ValueError("At least one initial sample is required")
         if self.sampling_method == 'lhs':
@@ -563,7 +569,7 @@ def initial_sample(self):
         self.cv_norm_maximum = self.initial_std
 
     def contextual_variance(self, std: list):
-        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
         if not self.af_params['explorationfactor'] == 'CV':
             return None
         if self.opt_direction == 'min':
@@ -581,7 +587,7 @@ def contextual_variance(self, std: list):
         return np.mean(std) / self.current_optimum
 
     def __optimize(self, max_fevals):
-        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
@@ -598,7 +604,7 @@ def __optimize(self, max_fevals):
         return self.results
 
     def __optimize_multi(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
@@ -709,7 +715,7 @@ def __optimize_multi(self, max_fevals):
         return self.results
 
     def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         aqfs = self.multi_afs
@@ -811,7 +817,7 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
         return self.results
 
     def __optimize_multi_fast(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
@@ -834,14 +840,13 @@ def __optimize_multi_fast(self, max_fevals):
         return self.results
 
     def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
+        """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = range(len(self.unvisited_cache))
         shuffle(list_random)
         return list_random
 
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Probability of Improvement (PI) """
-
+        """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
         x_mu, x_std = predictions
         if hyperparam is None:
@@ -856,8 +861,7 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Expected Improvement (EI) """
-
+        """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
         x_mu, x_std = predictions
         if hyperparam is None:
@@ -874,8 +878,7 @@ def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (LCB) """
-
+        """Acquisition function Lower Confidence Bound (LCB)."""
         x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params['explorationfactor']
@@ -886,8 +889,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
-
+        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
         x_mu, x_std = predictions
         if hyperparam is None:
@@ -905,7 +907,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
-        """ Visualize the model after the optimization """
+        """Visualize the model after the optimization."""
         print(self.__model.kernel_.get_params())
         print(self.__model.log_marginal_likelihood())
         import matplotlib.pyplot as plt
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index 59e385421..cc991dadf 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -1,22 +1,53 @@
-""" Lean implementation of Bayesian Optimization with GPyTorch """
+"""Lean implementation of Bayesian Optimization with GPyTorch."""
 # python
+import ast  # for casting strings to dict
+import warnings
 from copy import deepcopy
-from typing import Tuple
-from random import randint, shuffle, choice
 from math import ceil
-import warnings
-import ast    # for casting strings to dict
+from random import choice, randint, shuffle
+from typing import Tuple
 
 # external
 import numpy as np
 from numpy.random import default_rng
-import torch
-import gpytorch
-import arviz as az
 
-# internal
-from kernel_tuner.util import get_valid_configs
-from kernel_tuner.strategies import minimize
+from kernel_tuner.runners.runner import Runner
+from kernel_tuner.searchspace import Searchspace
+
+# optional
+try:
+    import gpytorch
+    import torch
+    # import arviz as az
+    bayes_opt_present = True
+
+    from torch import Tensor
+
+    class ExactGPModel(gpytorch.models.ExactGP):
+        def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
+            super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
+            self.mean_module = gpytorch.means.ZeroMean()
+            if cov_kernel_name == 'matern':
+                self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
+            elif cov_kernel_name == 'matern_scalekernel':
+                self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
+
+        def forward(self, x):
+            mean_x = self.mean_module(x)
+            covar_x = self.covar_module(x)
+            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+except ImportError:
+    bayes_opt_present = False
+
+    class Tensor():
+        pass
+
+    class ExactGPModel():
+        def __init__(self, train_x, train_y, likelihood):
+            raise ImportError("GPyTorch not imported")
+        def forward(self, x):
+            raise ImportError("GPyTorch not imported")
+
 
 # set supported hyperparameter values
 supported_precisions = ['float', 'double']
@@ -39,8 +70,8 @@ def default_optimizer_learningrates(key):
     return defaults[key]
 
 
-def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+def tune(searchspace: Searchspace, runner: Runner, tuning_options):
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -62,6 +93,10 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
+    if not bayes_opt_present:
+        raise ImportError(
+            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
+        )
 
     # set CUDA availability
     use_cuda = False
@@ -75,14 +110,13 @@ def tune(runner, kernel_options, device_options, tuning_options):
     optimization_direction = options.get("optimization_direction", 'min')
     num_initial_samples = int(options.get("popsize", 20))
     max_fevals = int(options.get("max_fevals", 220))
-    max_threads = runner.dev.max_threads
 
     # enabling scaling will unscale and snap inputs on evaluation, more efficient to scale all at once and keep unscaled values
     tuning_options["snap"] = False
     tuning_options["scaling"] = False
 
     # prune the search space using restrictions
-    parameter_space = get_valid_configs(tuning_options, max_threads)
+    parameter_space = searchspace.list.copy()
 
     # limit max_fevals to max size of the parameter space
     max_fevals = min(len(parameter_space), max_fevals)
@@ -92,32 +126,16 @@ def tune(runner, kernel_options, device_options, tuning_options):
         )
 
     # execute Bayesian Optimization
-    BO = BayesianOptimization(parameter_space, kernel_options, tuning_options, runner, num_initial_samples, optimization_direction, device)
+    BO = BayesianOptimization(parameter_space, tuning_options, runner, num_initial_samples, optimization_direction, device)
     all_results = BO.optimize(max_fevals)
 
     return all_results, runner.dev.get_environment()
 
 
-class ExactGPModel(gpytorch.models.ExactGP):
-
-    def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
-        self.mean_module = gpytorch.means.ZeroMean()
-        if cov_kernel_name == 'matern':
-            self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
-        elif cov_kernel_name == 'matern_scalekernel':
-            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
-
-    def forward(self, x):
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
-        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-
-
 class BayesianOptimization:
 
-    def __init__(self, parameter_space: list, kernel_options, tuning_options, runner, num_initial_samples: int, optimization_direction: str,
-                 device: torch.device) -> None:
+    def __init__(self, parameter_space: list, tuning_options, runner: Runner, num_initial_samples: int, optimization_direction: str,
+                 device) -> None:
         self.animate = False    # TODO remove
 
         # set defaults
@@ -128,7 +146,6 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.current_optimal_config = None
 
         # set Kernel Tuner data
-        self.kernel_options = kernel_options
         self.tuning_options = tuning_options
         self.runner = runner
         self.max_threads = runner.dev.max_threads
@@ -157,7 +174,7 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
         self.af_params = af_params
 
         # set Tensors
-        self.device = device
+        self.device: torch.device = device
         self.out_device = torch.device("cpu")
         self.size = len(parameter_space)
         self.index_counter = torch.arange(self.size)
@@ -216,12 +233,12 @@ def __init__(self, parameter_space: list, kernel_options, tuning_options, runner
 
     @property
     def train_x(self):
-        """ Get the valid parameter configurations """
+        """Get the valid parameter configurations."""
         return self.param_configs_scaled[self.valid_configs].to(self.device)
 
     @property
     def train_y(self):
-        """ Get the valid results """
+        """Get the valid results."""
         outputs = self.results[self.valid_configs]
         if self.scaled_output:
             # z-score, remove mean and make unit variance to scale it to N(0,1)
@@ -231,7 +248,7 @@ def train_y(self):
 
     @property
     def train_y_err(self):
-        """ Get the error on the valid results """
+        """Get the error on the valid results."""
         std = self.results_std[self.valid_configs]
         if self.scaled_output and std.std() > 0.0:
             std = (std - std.mean()) / std.std()    # use z-score to get normalized variability
@@ -239,39 +256,39 @@ def train_y_err(self):
 
     @property
     def test_x(self):
-        """ Get the not yet visited parameter configurations """
+        """Get the not yet visited parameter configurations."""
         return self.param_configs_scaled[self.unvisited_configs].to(self.device)
 
     @property
     def test_x_unscaled(self):
-        """ Get the unscaled, not yet visited parameter configurations """
+        """Get the unscaled, not yet visited parameter configurations."""
         return self.param_configs[self.unvisited_configs]
 
     @property
     def test_y_err(self):
-        """ Get the expected error on the test set """
+        """Get the expected error on the test set."""
         train_y_err = self.train_y_err
         return torch.full((self.size - len(train_y_err), ), torch.mean(train_y_err))
 
     @property
     def invalid_x(self):
-        """ Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid) """
+        """Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid)."""
         invalid_mask = (self.unvisited_configs == self.valid_configs)
         return self.param_configs[invalid_mask]
 
     def true_param_config_index(self, target_index: int) -> int:
-        """ The index required to get the true config param index when dealing with test_x """
+        """The index required to get the true config param index when dealing with test_x."""
         # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
         masked_counter = self.index_counter[self.unvisited_configs]
         return masked_counter[target_index]
 
-    def true_param_config_indices(self, target_indices: torch.Tensor) -> torch.Tensor:
-        """ Same as true_param_config_index, but for an array of targets instead. """
+    def true_param_config_indices(self, target_indices: Tensor) -> Tensor:
+        """Same as true_param_config_index, but for an array of targets instead."""
         masked_counter = self.index_counter[self.unvisited_configs]
         return masked_counter.index_select(0, target_indices)
 
     def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
-        """ Initialize the surrogate model """
+        """Initialize the surrogate model."""
         # self.initial_sample_std = self.min_std
         if take_initial_sample:
             self.initial_sample()
@@ -311,7 +328,7 @@ def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
             self.train_hyperparams(0)
 
     def import_cached_evaluations(self):
-        """ Import the previously evaluated configurations into this run """
+        """Import the previously evaluated configurations into this run."""
         # make strings of all the parameter configurations in the search space
         param_config_strings = list()
         for param_config in self.true_param_configs:
@@ -329,7 +346,7 @@ def import_cached_evaluations(self):
         print(f"Imported {len(self.all_results)} previously evaluated configurations.")
 
     def initial_sample(self):
-        """ Take an initial sample of the parameter space """
+        """Take an initial sample of the parameter space."""
         list_param_config_indices = list(self.index_counter[~self.unvisited_configs])
 
         # generate a random offset from a normal distribution to add to the sample indices
@@ -378,8 +395,8 @@ def initial_sample(self):
         # save a boolean mask of the initial samples
         self.inital_sample_configs = self.valid_configs.detach().clone()
 
-    def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
-        """ Get a centered Latin Hypercube Sample with a random offset """
+    def get_lhs_samples(self, random_offsets: np.ndarray) -> Tensor:
+        """Get a centered Latin Hypercube Sample with a random offset."""
         n_samples = self.num_initial_samples - self.fevals
 
         # first get the seperate parameter values to make possibly fictional distributed parameter configurations
@@ -444,7 +461,7 @@ def get_lhs_samples(self, random_offsets: np.ndarray) -> torch.Tensor:
         return param_configs_indices
 
     def take_min_max_initial_samples(self, list_param_config_indices: list, samples_per_parameter=1) -> list:
-        """ Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters """
+        """Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters."""
         # number of samples required is at least (samples_per_parameter) * (number of parameters) + 1
 
         # first get the individual parameter values and sort them
@@ -524,7 +541,7 @@ def take_min_max_initial_samples(self, list_param_config_indices: list, samples_
         return list_param_config_indices
 
     def get_middle_index_of_least_evaluated_region(self) -> int:
-        """ Get the middle index of the region of parameter configurations that is the least visited """
+        """Get the middle index of the region of parameter configurations that is the least visited."""
         # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
         distance_tensor = torch.arange(self.size)
 
@@ -542,7 +559,7 @@ def get_middle_index_of_least_evaluated_region(self) -> int:
         return middle_index
 
     def train_hyperparams(self, training_iter: int):
-        """ Optimize the surrogate model hyperparameters iteratively """
+        """Optimize the surrogate model hyperparameters iteratively."""
         self.model.train()
         self.likelihood.train()
 
@@ -601,7 +618,7 @@ def closure():
         self.likelihood.eval()
 
     def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
-        """ Optimize the objective """
+        """Optimize the objective."""
         predictions_tuple = None
         short_param_config_index = None
         last_invalid = False
@@ -625,9 +642,9 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
                 param_config_index = least_evaluated_region_index
                 short_param_config_index = -1
                 if mean_has_NaN:
-                    warning_reason = f"there were NaN in the predicted mean"
+                    warning_reason = "there were NaN in the predicted mean"
                 elif std_has_NaN:
-                    warning_reason = f"there were NaN in the predicted std"
+                    warning_reason = "there were NaN in the predicted std"
                 else:
                     warning_reason = "all STDs were the same"
                 warnings.warn(
@@ -677,10 +694,10 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
         return self.all_results
 
     def objective_function(self, param_config: tuple) -> float:
-        return minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.all_results, check_restrictions=False)
+        return self.runner.run(param_config, self.tuning_options)
 
     def evaluate_config(self, param_config_index: int) -> float:
-        """ Evaluates a parameter configuration, returns the time """
+        """Evaluates a parameter configuration, returns the time."""
         param_config = self.true_param_configs[param_config_index]
         time = self.objective_function(param_config)
         self.register_result(time, param_config_index)
@@ -689,9 +706,9 @@ def evaluate_config(self, param_config_index: int) -> float:
         return time
 
     def register_result(self, result: float, param_config_index: int):
-        """ Registers the result to the Tensors and adds the hyperparameters to the results dict """
+        """Registers the result to the Tensors and adds the hyperparameters to the results dict."""
         # set the unvisited Tensors
-        if self.unvisited_configs[param_config_index] == False:
+        if self.unvisited_configs[param_config_index] is False:
             raise ValueError(f"The param config index {param_config_index} was already set to False!")
         self.unvisited_configs[param_config_index] = False
 
@@ -712,13 +729,13 @@ def register_result(self, result: float, param_config_index: int):
         # TODO check if it is possible to write the results with hyperparameters to the cache if not in simulation mode, maybe with observer?
 
     def update_unique_results(self):
-        """ Updates the unique results dictionary """
+        """Updates the unique results dictionary."""
         record = self.all_results[-1]
         # make a unique string by taking every value in a result, if it already exists, it is overwritten
         self.unique_results.update({",".join([str(v) for k, v in record.items() if k in self.tuning_options.tune_params]): record["time"]})
 
-    def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations """
+    def predict_list(self) -> Tuple[Tensor, Tensor]:
+        """Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations."""
         with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
             try:
                 observed_pred = self.likelihood(self.model(self.test_x))
@@ -735,16 +752,16 @@ def predict_list(self) -> Tuple[torch.Tensor, torch.Tensor]:
                 warnings.warn(str(e), RuntimeWarning)
                 return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
 
-    def get_diff_improvement(self, y_mu, y_std, fplus) -> torch.Tensor:
-        """ compute probability of improvement by assuming normality on the difference in improvement """
+    def get_diff_improvement(self, y_mu, y_std, fplus) -> Tensor:
+        """Compute probability of improvement by assuming normality on the difference in improvement."""
         diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
         diff_improvement = (diff_improvement - diff_improvement.mean()) / max(diff_improvement.std(), self.min_std)    # force to N(0,1) with z-score
         if self.optimization_direction == 'max':
             diff_improvement = -diff_improvement
         return diff_improvement
 
-    def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
-        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+    def contextual_variance(self, mean: Tensor, std: Tensor):
+        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
         if not self.af_params['explorationfactor'] == 'CV':
             raise ValueError(f"Contextual Variance was called, but is not set as the exploration factor ({self.af_params['explorationfactor']})")
         if self.optimization_direction == 'max':
@@ -767,14 +784,13 @@ def contextual_variance(self, mean: torch.Tensor, std: torch.Tensor):
             raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
 
     def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
+        """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = list(range(len(self.unvisited_param_configs)))
         shuffle(list_random)
         return list_random
 
-    def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
-        """ Acquisition function Probability of Improvement (PoI) tensor-based """
-
+    def af_probability_of_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
+        """Acquisition function Probability of Improvement (PoI) tensor-based."""
         # prefetch required data
         y_mu, y_std = predictions
         if hyperparam is None:
@@ -790,9 +806,8 @@ def af_probability_of_improvement_tensor(self, predictions: Tuple[torch.Tensor,
         #     raise FloatingPointError("You need to scale the diff_improvement-values!")
         return cdf
 
-    def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.Tensor], hyperparam=None) -> torch.Tensor:
-        """ Acquisition function Expected Improvement (EI) tensor-based """
-
+    def af_expected_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
+        """Acquisition function Expected Improvement (EI) tensor-based."""
         # prefetch required data
         y_mu, y_std = predictions
         if hyperparam is None:
@@ -819,7 +834,7 @@ def af_expected_improvement_tensor(self, predictions: Tuple[torch.Tensor, torch.
     """                  """
 
     def apply_scaling_to_inputs(self):
-        """ Scale the inputs using min-max normalization (0-1) and remove constant parameters """
+        """Scale the inputs using min-max normalization (0-1) and remove constant parameters."""
         param_configs_scaled = torch.zeros_like(self.param_configs)
 
         # first get the scaling factors of each parameter
@@ -849,13 +864,13 @@ def apply_scaling_to_inputs(self):
             self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
         self.nonstatic_params = unchanging_params_tensor
 
-    def find_nearest(self, value, array: torch.Tensor):
-        """ Find the value nearest to the given value in the array """
+    def find_nearest(self, value, array: Tensor):
+        """Find the value nearest to the given value in the array."""
         index = (torch.abs(array - value)).argmin()
         return array[index]
 
     def get_hyperparam(self, name: str, default, supported_values=list(), type=None, cast=None):
-        """ Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function """
+        """Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function."""
         value = self.tuning_options.strategy_options.get(name, default)
 
         # check with predifined value list
@@ -873,12 +888,12 @@ def get_hyperparam(self, name: str, default, supported_values=list(), type=None,
             value = float(value)
         return value
 
-    def remove_from_predict_list(self, p: Tuple[torch.Tensor, torch.Tensor], i: int) -> Tuple[torch.Tensor, torch.Tensor]:
-        """ Remove an index from a tuple of predictions """
+    def remove_from_predict_list(self, p: Tuple[Tensor, Tensor], i: int) -> Tuple[Tensor, Tensor]:
+        """Remove an index from a tuple of predictions."""
         return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
 
     def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function based on the name """
+        """Set the acquisition function based on the name."""
         if acquisition_function not in supported_methods:
             raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
 
@@ -889,8 +904,8 @@ def set_acquisition_function(self, acquisition_function: str):
         elif acquisition_function == 'random':
             self.acquisition_function = self.af_random
 
-    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Tensor, dict]:
-        """ transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params """
+    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[Tensor, dict]:
+        """Transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params."""
         parameter_space = deepcopy(parameter_space)
         number_of_params = len(parameter_space[0])
 
@@ -920,7 +935,7 @@ def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[torch.Te
         return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
 
     def visualize(self):
-        """ Visualize the surrogate model and observations in a plot """
+        """Visualize the surrogate model and observations in a plot."""
         if self.fevals < 220:
             return None
         from matplotlib import pyplot as plt

From e407a84d3332292c3f5b0e2aa9143bf3a4cf4c11 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 25 Oct 2024 17:30:12 -0700
Subject: [PATCH 013/168] Compatibility with optional dependencies

---
 kernel_tuner/strategies/bayes_opt.py     | 21 ++++--
 kernel_tuner/strategies/bayes_opt_old.py | 96 ++++++++++++------------
 2 files changed, 59 insertions(+), 58 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 89318cd04..c384ecb97 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -451,7 +451,7 @@ def predict_list(self, lst: list) -> Tuple[list, list, list]:
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             mu, std = self.__model.predict(lst, return_std=True)
-            return mu, std
+            return list(zip(mu, std)), mu, std
 
     def fit_observations_to_model(self):
         """Update the model based on the current list of observations."""
@@ -540,7 +540,7 @@ def initial_sample(self):
             if self.is_valid(observation):
                 collected_samples += 1
         self.fit_observations_to_model()
-        _, std = self.predict_list(self.unvisited_cache)
+        _, _, std = self.predict_list(self.unvisited_cache)
         self.initial_sample_mean = np.mean(self.__valid_observations)
         # Alternatively:
         # self.initial_sample_std = np.std(self.__valid_observations)
@@ -736,11 +736,11 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                 if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
                     break
                 if increase_precision is True:
-                    predictions, _, std = self.predict_list(self.unvisited_cache)
+                    predictions = self.predict_list(self.unvisited_cache)
                     hyperparam = self.contextual_variance(std)
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]  # to avoid going out of bounds
+                # del predictions[best_af]  # to avoid going out of bounds
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -855,13 +855,12 @@ def af_random(self, predictions=None, hyperparam=None) -> list:
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
-        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for (x_mu, x_std) in predictions)
+        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for x_mu, x_std in predictions[0])
 
         # compute probability of improvement with CDF in bulk
         list_prob_improvement = norm.cdf(list_diff_improvement)
@@ -870,10 +869,15 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
-        x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
+        if len(predictions) == 3:
+            predictions, x_mu, x_std = predictions
+        elif len(predictions) == 2:
+            x_mu, x_std = predictions
+        else:
+            raise ValueError(f"Invalid predictions size {len(predictions)}")
 
         # precompute difference of improvement, CDF and PDF in bulk
         list_diff_improvement = list((fplus - x_mu) / (x_std + 1e-9) for (x_mu, x_std) in predictions)
@@ -892,6 +896,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         beta = hyperparam
+        _, x_mu, x_std = predictions
 
         # compute LCB in bulk
         list_lower_confidence_bound = (x_mu - beta * x_std)
@@ -900,7 +905,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
-        x_mu, x_std = predictions
+        _, x_mu, x_std = predictions
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
 
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
index 6107fad0b..c3381731a 100644
--- a/kernel_tuner/strategies/bayes_opt_old.py
+++ b/kernel_tuner/strategies/bayes_opt_old.py
@@ -1,32 +1,33 @@
-""" Bayesian Optimization implementation from the thesis by Willemsen """
-from copy import deepcopy
-from random import randint, shuffle
+"""Bayesian Optimization implementation from the thesis by Willemsen."""
 import itertools
-import warnings
 import time
+import warnings
+from copy import deepcopy
+from random import randint, shuffle
 
 import numpy as np
 
 # BO imports
 try:
     from typing import Tuple
+
     from scipy.stats import norm
-    from sklearn.gaussian_process import GaussianProcessRegressor
-    from sklearn.gaussian_process.kernels import ConstantKernel, RBF, Matern
     from sklearn.exceptions import ConvergenceWarning
+    from sklearn.gaussian_process import GaussianProcessRegressor
+    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
     from skopt.sampler import Lhs
     bayes_opt_present = True
 except ImportError:
     bayes_opt_present = False
 
-from kernel_tuner.strategies import minimize
 from kernel_tuner import util
+from kernel_tuner.strategies import minimize
 
 supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
 
 
 def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """ Generates normalization and denormalization dictionaries """
+    """Generates normalization and denormalization dictionaries."""
     original_to_normalized = dict()
     normalized_to_original = dict()
     for param_name in tune_params.keys():
@@ -42,14 +43,14 @@ def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict
 
 
 def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """ Normalize the parameter space given a normalization dictionary """
+    """Normalize the parameter space given a normalization dictionary."""
     keys = list(tune_params.keys())
     param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
     return param_space_normalized
 
 
 def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
-    """ Pruning of the parameter space to remove dimensions that have a constant parameter """
+    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
     pruned_tune_params_mask = list()
     removed_tune_params = list()
     param_names = list(tune_params.keys())
@@ -68,7 +69,7 @@ def prune_parameter_space(parameter_space, tuning_options, tune_params, normaliz
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -90,7 +91,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
-
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
     prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
     if not bayes_opt_present:
@@ -252,19 +252,19 @@ def current_optimum(self, value: float):
         self.__current_optimum = value
 
     def is_better_than(self, a: float, b: float) -> bool:
-        """ Determines which one is better depending on optimization direction """
+        """Determines which one is better depending on optimization direction."""
         return a < b if self.opt_direction == 'min' else a > b
 
     def is_not_visited(self, index: int) -> bool:
-        """ Returns whether a searchspace index has not been visited """
+        """Returns whether a searchspace index has not been visited."""
         return not self.__visited_searchspace_indices[index]
 
     def is_valid(self, observation: float) -> bool:
-        """ Returns whether an observation is valid """
-        return not (observation == None or observation == self.invalid_value or observation == np.NaN)
+        """Returns whether an observation is valid."""
+        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
 
     def get_af_by_name(self, name: str):
-        """ Get the basic acquisition functions by their name """
+        """Get the basic acquisition functions by their name."""
         basic_af_names = ['ei', 'poi', 'lcb']
         if name == 'ei':
             return self.af_expected_improvement
@@ -275,7 +275,7 @@ def get_af_by_name(self, name: str):
         raise ValueError(f"{name} not in {basic_af_names}")
 
     def set_acquisition_function(self, acquisition_function: str):
-        """ Set the acquisition function """
+        """Set the acquisition function."""
         if acquisition_function == 'poi':
             self.__af = self.af_probability_of_improvement
         elif acquisition_function == 'ei':
@@ -296,7 +296,7 @@ def set_acquisition_function(self, acquisition_function: str):
             raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
 
     def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """ Set the surrogate model with a covariance function and lengthscale """
+        """Set the surrogate model with a covariance function and lengthscale."""
         if cov_kernel_name == "constantrbf":
             kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
         elif cov_kernel_name == "rbf":
@@ -310,7 +310,7 @@ def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: floa
         self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
 
     def valid_params_observations(self) -> Tuple[list, list]:
-        """ Returns a list of valid observations and their parameter configurations """
+        """Returns a list of valid observations and their parameter configurations."""
         # if you do this every iteration, better keep it as cache and update in update_after_evaluation
         params = list()
         observations = list()
@@ -321,30 +321,30 @@ def valid_params_observations(self) -> Tuple[list, list]:
         return params, observations
 
     def unvisited(self) -> list:
-        """ Returns a list of unvisited parameter configurations - attention: cached version exists! """
+        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
         params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
         return params
 
     def find_param_config_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the search space if it exists """
+        """Find a parameter config index in the search space if it exists."""
         return self.searchspace.index(param_config)
 
     def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """ Find a parameter config index in the unvisited cache if it exists """
+        """Find a parameter config index in the unvisited cache if it exists."""
         return self.unvisited_cache.index(param_config)
 
     def normalize_param_config(self, param_config: tuple) -> tuple:
-        """ Normalizes a parameter configuration """
+        """Normalizes a parameter configuration."""
         normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return normalized
 
     def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """ Denormalizes a parameter configuration """
+        """Denormalizes a parameter configuration."""
         denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
         return denormalized
 
     def unprune_param_config(self, param_config: tuple) -> tuple:
-        """ In case of pruned dimensions, adds the removed dimensions back in the param config """
+        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
         unpruned = list()
         pruned_count = 0
         for removed in self.removed_tune_params:
@@ -356,7 +356,7 @@ def unprune_param_config(self, param_config: tuple) -> tuple:
         return tuple(unpruned)
 
     def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """ Adjust the visited and valid index records accordingly """
+        """Adjust the visited and valid index records accordingly."""
         validity = self.is_valid(observation)
         self.__visited_num += 1
         self.__observations[index] = observation
@@ -371,22 +371,22 @@ def update_after_evaluation(self, observation: float, index: int, param_config:
                 self.current_optimum = observation
 
     def predict(self, x) -> Tuple[float, float]:
-        """ Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration """
+        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
         return self.__model.predict([x], return_std=True)
 
     def predict_list(self, lst: list) -> Tuple[list, list, list]:
-        """ Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations """
+        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             mu, std = self.__model.predict(lst, return_std=True)
             return list(zip(mu, std)), mu, std
 
     def fit_observations_to_model(self):
-        """ Update the model based on the current list of observations """
+        """Update the model based on the current list of observations."""
         self.__model.fit(self.__valid_params, self.__valid_observations)
 
     def evaluate_objective_function(self, param_config: tuple) -> float:
-        """ Evaluates the objective function """
+        """Evaluates the objective function."""
         param_config = self.unprune_param_config(param_config)
         denormalized_param_config = self.denormalize_param_config(param_config)
         if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
@@ -396,11 +396,11 @@ def evaluate_objective_function(self, param_config: tuple) -> float:
         return val
 
     def dimensions(self) -> list:
-        """ List of parameter values per parameter """
+        """List of parameter values per parameter."""
         return self.tune_params.values()
 
     def draw_random_sample(self) -> Tuple[list, int]:
-        """ Draw a random sample from the unvisited parameter configurations """
+        """Draw a random sample from the unvisited parameter configurations."""
         if len(self.unvisited_cache) < 1:
             raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
         index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
@@ -409,7 +409,7 @@ def draw_random_sample(self) -> Tuple[list, int]:
         return param_config, actual_index
 
     def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """ Draws an LHS-distributed sample from the search space """
+        """Draws an LHS-distributed sample from the search space."""
         if self.searchspace_size < num_samples:
             raise ValueError("Can't sample more than the size of the search space")
         if self.sampling_crit is None:
@@ -432,7 +432,7 @@ def draw_latin_hypercube_samples(self, num_samples: int) -> list:
         return list(zip(normalized_param_configs, indices))
 
     def initial_sample(self):
-        """ Draws an initial sample using random sampling """
+        """Draws an initial sample using random sampling."""
         if self.num_initial_samples <= 0:
             raise ValueError("At least one initial sample is required")
         if self.sampling_method == 'lhs':
@@ -466,7 +466,7 @@ def initial_sample(self):
         self.cv_norm_maximum = self.initial_std
 
     def contextual_variance(self, std: list):
-        """ Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018) """
+        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
         if not self.af_params['explorationfactor'] == 'CV':
             return None
         if self.opt_direction == 'min':
@@ -484,7 +484,7 @@ def contextual_variance(self, std: list):
         return np.mean(std) / self.current_optimum
 
     def __optimize(self, max_fevals):
-        """ Find the next best candidate configuration(s), evaluate those and update the model accordingly """
+        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
@@ -501,7 +501,7 @@ def __optimize(self, max_fevals):
         return self.results
 
     def __optimize_multi(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
@@ -612,7 +612,7 @@ def __optimize_multi(self, max_fevals):
         return self.results
 
     def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
         if self.opt_direction != 'min':
             raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
         aqfs = self.multi_afs
@@ -713,7 +713,7 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
         return self.results
 
     def __optimize_multi_fast(self, max_fevals):
-        """ Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once. """
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
@@ -735,14 +735,13 @@ def __optimize_multi_fast(self, max_fevals):
         return self.results
 
     def af_random(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function returning a randomly shuffled list for comparison """
+        """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = range(len(self.unvisited_cache))
         shuffle(list_random)
         return list_random
 
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Probability of Improvement (PI) """
-
+        """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -759,8 +758,7 @@ def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> li
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Expected Improvement (EI) """
-
+        """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -784,8 +782,7 @@ def exp_improvement(index) -> float:
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (LCB) """
-
+        """Acquisition function Lower Confidence Bound (LCB)."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -798,8 +795,7 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """ Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010 """
-
+        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
         if predictions is None:
             predictions, _, _ = self.predict_list(self.unvisited_cache)
@@ -818,7 +814,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
-        """ Visualize the model after the optimization """
+        """Visualize the model after the optimization."""
         print(self.__model.kernel_.get_params())
         print(self.__model.log_marginal_likelihood())
         import matplotlib.pyplot as plt

From e6c457da8a988d5a2e40b6fb02de115d47e3cd2e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 12:51:43 -0700
Subject: [PATCH 014/168] Improved time unit conversion

---
 kernel_tuner/core.py       |  2 +-
 kernel_tuner/file_utils.py |  2 +-
 kernel_tuner/interface.py  | 19 +++++++++++++++----
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index f139111e7..6dc580850 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -598,7 +598,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
                     if kernel_options.texmem_args is not None:
                         self.dev.copy_texture_memory_args(kernel_options.texmem_args)
 
-                # stop compilation stopwatch and convert to miliseconds
+                # stop compilation stopwatch and convert to milliseconds
                 last_compilation_time = 1000 * (time.perf_counter() - start_compilation)
 
                 # test kernel for correctness
diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
index 2b75cc023..9231f0e2e 100644
--- a/kernel_tuner/file_utils.py
+++ b/kernel_tuner/file_utils.py
@@ -152,7 +152,7 @@ def get_t4_results(results, tune_params, objective="time"):
 
     # write output_data to a JSON file
     version, _ = output_file_schema("results")
-    output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'miliseconds'})
+    output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'milliseconds'})
     return output_json
 
 def store_output_file(output_filename: str, results, tune_params, objective="time"):
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index a557ae589..2bfa06a89 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -613,8 +613,12 @@ def tune_kernel(
     util.append_default_block_size_names(block_size_names)
 
     # if the restrictions are not constraints or a callable, the restrictions are strings, so parse them to functions (increases restrictions check performance significantly)
-    if restrictions is not None and not callable(restrictions) and not any(isinstance(r, Constraint) for r in restrictions):
-        restrictions = util.parse_restrictions(restrictions)
+    if (
+        restrictions is not None
+        and not callable(restrictions)
+        and not any(isinstance(r, Constraint) for r in restrictions)
+    ):
+        restrictions = util.parse_restrictions(restrictions, tune_params)
 
     # sort all the options into separate dicts
     opts = locals()
@@ -854,7 +858,14 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names):
     util.check_block_size_names(block_size_names)
 
 
-def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation_mode = False, output_T4 = True, iterations = 7, strategy_options = None):
+def tune_kernel_T1(
+    input_filepath: Path,
+    cache_filepath: Path = None,
+    simulation_mode=False,
+    output_T4=True,
+    iterations=7,
+    strategy_options=None,
+):
     """Call the tune function with a T1 input file."""
     inputs = get_input_file(input_filepath)
     kernelspec: dict = inputs["KernelSpecification"]
@@ -952,7 +963,7 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation
         verbose=False,
         iterations=iterations,
         strategy=strategy,
-        strategy_options=strategy_options
+        strategy_options=strategy_options,
     )
     if output_T4:
         return get_t4_metadata(), get_t4_results(results, tune_params)

From a9f8de4303b9442fb1bb96972bb129c4d43583d3 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 14:39:35 -0700
Subject: [PATCH 015/168] Changed hyperparameter tuning setup

---
 kernel_tuner/hyper.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 37235a26b..08d998dd3 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -86,20 +86,20 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
-    # hyperparams = {
-    #     'popsize': [10, 20, 30],
-    #     'maxiter': [50, 100, 150],
-    #     'w': [0.25, 0.5, 0.75],
-    #     'c1': [1.0, 2.0, 3.0],
-    #     'c2': [0.5, 1.0, 1.5]
-    # }
-    # result, env = tune_hyper_params('pso', hyperparams)
     hyperparams = {
-        'neighbor': ['Hamming', 'adjacent'],
-        'restart': [True, False],
-        'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
-        'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+        'popsize': [10, 20, 30],
+        'maxiter': [50, 100, 150],
+        'w': [0.25, 0.5, 0.75],
+        'c1': [1.0, 2.0, 3.0],
+        'c2': [0.5, 1.0, 1.5]
     }
-    result, env = tune_hyper_params('greedy_ils', hyperparams)
+    result, env = tune_hyper_params('pso', hyperparams)
+    # hyperparams = {
+    #     'neighbor': ['Hamming', 'adjacent'],
+    #     'restart': [True, False],
+    #     'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
+    #     'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+    # }
+    # result, env = tune_hyper_params('greedy_ils', hyperparams)
     print(result)
     print(env['best_config'])

From 42319998384b4a0211ede5f2e14aacfd9e7233d2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 15:28:00 -0700
Subject: [PATCH 016/168] Added the hyperparamtuning experiments file

---
 .gitignore            |   3 +-
 hyperparamtuning.json | 103 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 hyperparamtuning.json

diff --git a/.gitignore b/.gitignore
index e38385b00..39d734594 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 poetry.lock
 noxenv.txt
 noxsettings.toml
-hyperparamtuning/
+hyperparamtuning/*
 
 ### Python ###
 *.pyc
@@ -17,6 +17,7 @@ push_to_pypi.sh
 .nfs*
 *.log
 *.json
+!hyperparamtuning.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
 *.csv
diff --git a/hyperparamtuning.json b/hyperparamtuning.json
new file mode 100644
index 000000000..19dba21cb
--- /dev/null
+++ b/hyperparamtuning.json
@@ -0,0 +1,103 @@
+{
+    "version": "1.1.0",
+    "name": "hyperparamtuning",
+    "parent_folder": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/kernel_tuner/hyperparamtuning",
+    "experimental_groups_defaults": {
+        "applications": [
+            {
+                "name": "convolution",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "convolution.json"
+            },
+            {
+                "name": "pnpoly",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "pnpoly.json"
+            }
+        ],
+        "gpus": [
+            "RTX_3090",
+            "RTX_2080_Ti"
+        ],
+        "pattern_for_full_search_space_filenames": {
+            "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json"
+        },
+        "stochastic": true,
+        "repeats": 25,
+        "samples": 1,
+        "minimum_number_of_valid_search_iterations": 20,
+        "ignore_cache": false
+    },
+    "search_strategies": [
+        {
+            "autotuner": "KernelTuner",
+            "name": "genetic_algorithm_popsize=5_maxiter=5_method=uniform_mutation_chance=10",
+            "display_name": "Genetic algorithm",
+            "search_method": "genetic_algorithm",
+            "search_method_hyperparameters": [
+                {
+                    "name": "popsize",
+                    "value": 5
+                },
+                {
+                    "name": "maxiter",
+                    "value": 5
+                },
+                {
+                    "name": "method",
+                    "value": "uniform"
+                },
+                {
+                    "name": "mutation_chance",
+                    "value": 10
+                }
+            ]
+        }
+    ],
+    "statistics_settings": {
+        "minimization": true,
+        "cutoff_percentile": 0.96,
+        "cutoff_percentile_start": 0.5,
+        "cutoff_type": "fevals",
+        "objective_time_keys": [
+            "all"
+        ],
+        "objective_performance_keys": [
+            "time"
+        ]
+    },
+    "visualization_settings": {
+        "plots": [
+            {
+                "scope": "searchspace",
+                "style": "line",
+                "x_axis_value_types": [
+                    "fevals"
+                ],
+                "y_axis_value_types": [
+                    "normalized",
+                    "baseline"
+                ]
+            },
+            {
+                "scope": "searchspace",
+                "style": "line",
+                "x_axis_value_types": [
+                    "time"
+                ],
+                "y_axis_value_types": [
+                    "normalized",
+                    "baseline"
+                ]
+            },
+            {
+                "scope": "aggregate",
+                "style": "line"
+            }
+        ],
+        "resolution": 1000.0,
+        "confidence_level": 0.95,
+        "compare_baselines": false,
+        "compare_split_times": false
+    }
+}
\ No newline at end of file

From 6fe94ca21b10a98c756052b5618e2f109f474466 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 15:42:58 -0700
Subject: [PATCH 017/168] Changed from BAT to HIP paper searchspaces

---
 hyperparamtuning.json | 49 +++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/hyperparamtuning.json b/hyperparamtuning.json
index 19dba21cb..947eb2904 100644
--- a/hyperparamtuning.json
+++ b/hyperparamtuning.json
@@ -5,19 +5,32 @@
     "experimental_groups_defaults": {
         "applications": [
             {
-                "name": "convolution",
+                "name": "convolution_milo",
                 "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "convolution.json"
+                "input_file": "convolution_milo.json"
             },
             {
-                "name": "pnpoly",
+                "name": "dedisp_milo",
                 "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "pnpoly.json"
+                "input_file": "dedisp_milo.json"
+            },
+            {
+                "name": "gemm_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "dedisp_milo.json"
+            },
+            {
+                "name": "hotspot_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "dedisp_milo.json"
             }
         ],
         "gpus": [
-            "RTX_3090",
-            "RTX_2080_Ti"
+            "A100",
+            "A4000",
+            "MI50",
+            "MI250X",
+            "W6600"
         ],
         "pattern_for_full_search_space_filenames": {
             "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json"
@@ -31,27 +44,9 @@
     "search_strategies": [
         {
             "autotuner": "KernelTuner",
-            "name": "genetic_algorithm_popsize=5_maxiter=5_method=uniform_mutation_chance=10",
-            "display_name": "Genetic algorithm",
-            "search_method": "genetic_algorithm",
-            "search_method_hyperparameters": [
-                {
-                    "name": "popsize",
-                    "value": 5
-                },
-                {
-                    "name": "maxiter",
-                    "value": 5
-                },
-                {
-                    "name": "method",
-                    "value": "uniform"
-                },
-                {
-                    "name": "mutation_chance",
-                    "value": 10
-                }
-            ]
+            "name": "brute_force",
+            "display_name": "Brute force",
+            "search_method": "brute_force"
         }
     ],
     "statistics_settings": {

From 05c39cbad6cefdfe64c76083a702fc8d5aebc12d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 15:59:34 -0700
Subject: [PATCH 018/168] Changed from BAT to HIP paper searchspaces

---
 .gitignore                          |  1 -
 hyperparamtuning.json               | 98 -----------------------------
 kernel_tuner/backends/hypertuner.py | 25 ++++----
 kernel_tuner/hyper.py               |  3 +-
 4 files changed, 13 insertions(+), 114 deletions(-)
 delete mode 100644 hyperparamtuning.json

diff --git a/.gitignore b/.gitignore
index 39d734594..47ffc4024 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,7 +17,6 @@ push_to_pypi.sh
 .nfs*
 *.log
 *.json
-!hyperparamtuning.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
 *.csv
diff --git a/hyperparamtuning.json b/hyperparamtuning.json
deleted file mode 100644
index 947eb2904..000000000
--- a/hyperparamtuning.json
+++ /dev/null
@@ -1,98 +0,0 @@
-{
-    "version": "1.1.0",
-    "name": "hyperparamtuning",
-    "parent_folder": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/kernel_tuner/hyperparamtuning",
-    "experimental_groups_defaults": {
-        "applications": [
-            {
-                "name": "convolution_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "convolution_milo.json"
-            },
-            {
-                "name": "dedisp_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "dedisp_milo.json"
-            },
-            {
-                "name": "gemm_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "dedisp_milo.json"
-            },
-            {
-                "name": "hotspot_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
-                "input_file": "dedisp_milo.json"
-            }
-        ],
-        "gpus": [
-            "A100",
-            "A4000",
-            "MI50",
-            "MI250X",
-            "W6600"
-        ],
-        "pattern_for_full_search_space_filenames": {
-            "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json"
-        },
-        "stochastic": true,
-        "repeats": 25,
-        "samples": 1,
-        "minimum_number_of_valid_search_iterations": 20,
-        "ignore_cache": false
-    },
-    "search_strategies": [
-        {
-            "autotuner": "KernelTuner",
-            "name": "brute_force",
-            "display_name": "Brute force",
-            "search_method": "brute_force"
-        }
-    ],
-    "statistics_settings": {
-        "minimization": true,
-        "cutoff_percentile": 0.96,
-        "cutoff_percentile_start": 0.5,
-        "cutoff_type": "fevals",
-        "objective_time_keys": [
-            "all"
-        ],
-        "objective_performance_keys": [
-            "time"
-        ]
-    },
-    "visualization_settings": {
-        "plots": [
-            {
-                "scope": "searchspace",
-                "style": "line",
-                "x_axis_value_types": [
-                    "fevals"
-                ],
-                "y_axis_value_types": [
-                    "normalized",
-                    "baseline"
-                ]
-            },
-            {
-                "scope": "searchspace",
-                "style": "line",
-                "x_axis_value_types": [
-                    "time"
-                ],
-                "y_axis_value_types": [
-                    "normalized",
-                    "baseline"
-                ]
-            },
-            {
-                "scope": "aggregate",
-                "style": "line"
-            }
-        ],
-        "resolution": 1000.0,
-        "confidence_level": 0.95,
-        "compare_baselines": false,
-        "compare_split_times": false
-    }
-}
\ No newline at end of file
diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 65a263ce1..53e5dd6da 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -61,20 +61,17 @@ def compile(self, kernel_instance):
         path.mkdir(exist_ok=True)
 
         # TODO get applications & GPUs args from benchmark
-        gpus = ["RTX_3090", "RTX_2080_Ti"]
-        applications = None
-        # applications = [
-        #     {
-        #         "name": "convolution",
-        #         "folder": "./cached_data_used/kernels",
-        #         "input_file": "convolution.json"
-        #     },
-        #     {
-        #         "name": "pnpoly",
-        #         "folder": "./cached_data_used/kernels",
-        #         "input_file": "pnpoly.json"
-        #     }
-        # ]
+        # gpus = ["RTX_3090", "RTX_2080_Ti"]
+        # applications = None
+
+        gpus = ["A100", "A4000", "MI50", "MI250X", "W6600"]
+        applications = [
+            {
+                "name": "convolution_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "convolution_milo.json"
+            }
+        ]
 
         # strategy settings
         strategy: str = kernel_instance.arguments[0]
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 08d998dd3..8c0fb5d4e 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -60,6 +60,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
 
     # pass a temporary cache file to avoid duplicate execution
     cachefile = get_random_unique_filename('temp_', '.json')
+    cachefile = Path("hyperparamtuning_milo_bruteforce.json")
     kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
@@ -68,7 +69,7 @@ def put_if_not_present(target_dict, key, value):
     put_if_not_present(kwargs, "verbose", True)
     put_if_not_present(kwargs, "quiet", False)
     kwargs['simulation_mode'] = False
-    kwargs['strategy'] = 'dual_annealing'
+    kwargs['strategy'] = 'brute_force'
     kwargs['verify'] = None
     arguments = [target_strategy]
 

From b0e457325aaaca66fc3a67c4b2163e8d33bc413e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 29 Oct 2024 17:40:37 -0700
Subject: [PATCH 019/168] Complex restrictions with tunable parameters provided
 are compiled

---
 kernel_tuner/searchspace.py | 5 +++--
 kernel_tuner/util.py        | 9 +++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index e36fca54e..cc569abc5 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -66,7 +66,8 @@ def __init__(
         restrictions = [restrictions] if not isinstance(restrictions, list) else restrictions
         if (
             len(restrictions) > 0
-            and any(isinstance(restriction, str) for restriction in restrictions)
+            and (any(isinstance(restriction, str) for restriction in restrictions)
+            or any(isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple)))
             and not (framework_l == "pysmt" or framework_l == "bruteforce")
         ):
             self.restrictions = compile_restrictions(
@@ -388,7 +389,7 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem:
                     all_params_required = all(param_name in required_params for param_name in self.param_names)
                     parameter_space.addConstraint(restriction, None if all_params_required else required_params)
                 else:
-                    raise ValueError(f"Unrecognized restriction {restriction}")
+                    raise ValueError(f"Unrecognized restriction type {type(restriction)} ({restriction})")
 
         # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!)
         elif callable(self.restrictions):
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 2ac9498e4..e8d194e11 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1037,8 +1037,11 @@ def to_equality_constraint(
                     # check if we can turn this into the built-in equality comparison constraint
                     finalized_constraint = to_equality_constraint(parsed_restriction, params_used)
             if finalized_constraint is None:
-                # we must turn it into a general function
-                finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n"
+                if parsed_restriction.startswith("def r("):
+                    finalized_constraint = parsed_restriction
+                else:
+                    # we must turn it into a general function
+                    finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n"
             parsed_restrictions.append((finalized_constraint, params_used))
     else:
         # create one monolithic function
@@ -1075,6 +1078,8 @@ def compile_restrictions(
     restrictions: list, tune_params: dict, monolithic=False, format=None, try_to_constraint=True
 ) -> list[tuple[Union[str, Constraint, FunctionType], list[str]]]:
     """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used, or a single Function if monolithic is true."""
+    # change tuples consisting of strings and tunable parameters to only strings to compile
+    restrictions = [r[0] if isinstance(r, tuple) and len(r) == 2 and isinstance(r[0], str) and isinstance(r[1], list) else r for r in restrictions]
     # filter the restrictions to get only the strings
     restrictions_str, restrictions_ignore = [], []
     for r in restrictions:

From 0a2748d15ce0609665ad87a048332a3cc0bd4473 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 31 Oct 2024 13:00:49 -0700
Subject: [PATCH 020/168] Made original BO compatible with Searchspaces

---
 kernel_tuner/strategies/bayes_opt.py | 56 ++++++++++++++--------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index c384ecb97..dd0551740 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -93,9 +93,6 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     """
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    # limit max_fevals to max size of the parameter space
-    max_fevals = min(searchspace.size, max_fevals)
-
     prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
     if not bayes_opt_present:
         raise ImportError(
@@ -571,8 +568,8 @@ def __optimize(self, max_fevals):
         while self.fevals < max_fevals:
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
             list_of_acquisition_values = self.__af(predictions, hyperparam)
             # afterwards select the best AF value
             best_af = self.argopt(list_of_acquisition_values)
@@ -606,8 +603,8 @@ def __optimize_multi(self, max_fevals):
             time_start = time.perf_counter_ns()
             # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
             aqfs = self.multi_afs
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             time_predictions = time.perf_counter_ns()
@@ -728,19 +725,19 @@ def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
                 raise ValueError(self.error_message_searchspace_fully_observed)
             observations_median = np.median(self.__valid_observations)
             if increase_precision is False:
-                predictions = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(predictions[1])
+                predictions, _, std = self.predict_list(self.unvisited_cache)
+                hyperparam = self.contextual_variance(std)
             for af_index, af in enumerate(aqfs):
                 if af_index in skip_af_index:
                     continue
                 if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
                     break
                 if increase_precision is True:
-                    predictions = self.predict_list(self.unvisited_cache)
+                    predictions, _, std = self.predict_list(self.unvisited_cache)
                     hyperparam = self.contextual_variance(std)
                 list_of_acquisition_values = af(predictions, hyperparam)
                 best_af = self.argopt(list_of_acquisition_values)
-                # del predictions[best_af]  # to avoid going out of bounds
+                del predictions[best_af]  # to avoid going out of bounds
                 candidate_params = self.unvisited_cache[best_af]
                 candidate_index = self.find_param_config_index(candidate_params)
                 observation = self.evaluate_objective_function(candidate_params)
@@ -830,8 +827,8 @@ def __optimize_multi_fast(self, max_fevals):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
+            predictions, _, std = self.predict_list(self.unvisited_cache)
+            hyperparam = self.contextual_variance(std)
             if self.__visited_num >= self.searchspace_size:
                 raise ValueError(self.error_message_searchspace_fully_observed)
             for af in aqfs:
@@ -855,37 +852,42 @@ def af_random(self, predictions=None, hyperparam=None) -> list:
     def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Probability of Improvement (PI)."""
         # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
 
         # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for x_mu, x_std in predictions[0])
+        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1e-9)) for (x_mu, x_std) in predictions)
 
         # compute probability of improvement with CDF in bulk
         list_prob_improvement = norm.cdf(list_diff_improvement)
+
         return list_prob_improvement
 
     def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Expected Improvement (EI)."""
         # prefetch required data
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         fplus = self.current_optimum - hyperparam
-        if len(predictions) == 3:
-            predictions, x_mu, x_std = predictions
-        elif len(predictions) == 2:
-            x_mu, x_std = predictions
-        else:
-            raise ValueError(f"Invalid predictions size {len(predictions)}")
 
         # precompute difference of improvement, CDF and PDF in bulk
         list_diff_improvement = list((fplus - x_mu) / (x_std + 1e-9) for (x_mu, x_std) in predictions)
         list_cdf = norm.cdf(list_diff_improvement)
         list_pdf = norm.pdf(list_diff_improvement)
 
-        # compute expected improvement in bulk
-        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
+        # specify AF calculation
+        def exp_improvement(index) -> float:
+            x_mu, x_std = predictions[index]
+            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
+            return -ei
+
+        # calculate AF
+        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
         return list_exp_improvement
 
     def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
@@ -896,16 +898,16 @@ def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
         beta = hyperparam
-        _, x_mu, x_std = predictions
 
         # compute LCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
         return list_lower_confidence_bound
 
     def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
         # prefetch required data
-        _, x_mu, x_std = predictions
+        if predictions is None:
+            predictions, _, _ = self.predict_list(self.unvisited_cache)
         if hyperparam is None:
             hyperparam = self.af_params["explorationfactor"]
 
@@ -917,7 +919,7 @@ def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None)
         beta = np.sqrt(zeta * (2 * np.log((t ** (d / 2.0 + 2)) * (np.pi**2) / (3.0 * delta))))
 
         # compute UCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
+        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
         return list_lower_confidence_bound
 
     def visualize_after_opt(self):
@@ -938,4 +940,4 @@ def visualize_after_opt(self):
         plt.plot(x_axis, mu, label="predictions", linestyle=" ", marker=".")
         plt.plot(x_axis, brute_force_observations, label="actual", linestyle=" ", marker=".")
         plt.legend()
-        plt.show()
+        plt.show()
\ No newline at end of file

From 6354f4d6bb16b37631de898eed1de5ff45f1a1b6 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 31 Oct 2024 13:29:37 -0700
Subject: [PATCH 021/168] Implemented a new acquisition function that takes the
 ratio between prediction and evaluation into account to be more efficient

---
 kernel_tuner/strategies/bayes_opt.py | 45 ++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index dd0551740..47f82e3a9 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -24,7 +24,7 @@
 
 from kernel_tuner import util
 
-supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
+supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"]
 
 
 def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
@@ -162,7 +162,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     covariancelengthscale=("The covariance length scale", 1.5),
     method=(
         "The Bayesian Optimization method to use, choose any from " + ", ".join(supported_methods),
-        "multi-advanced",
+        "multi-ultrafast",
     ),
     samplingmethod=(
         "Method used for initial sampling the parameter space, either random or Latin Hypercube Sampling (LHS)",
@@ -199,7 +199,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         # get hyperparameters
         cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
         cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
-        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
+        acquisition_function = get_hyperparam("method", "multi-ultrafast", self.supported_methods)
         acq = acquisition_function
         acq_params = get_hyperparam("methodparams", {})
         multi_af_names = get_hyperparam("multi_af_names", ["ei", "poi", "lcb"])
@@ -342,6 +342,8 @@ def set_acquisition_function(self, acquisition_function: str):
             self.optimize = self.__optimize_multi_advanced
         elif acquisition_function == "multi-fast":
             self.optimize = self.__optimize_multi_fast
+        elif acquisition_function == "multi-ultrafast":
+            self.optimize = self.__optimize_multi_ultrafast
         else:
             raise ValueError(
                 "Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function)
@@ -843,6 +845,43 @@ def __optimize_multi_fast(self, max_fevals):
                 self.update_after_evaluation(observation, candidate_index, candidate_params)
             self.fit_observations_to_model()
 
+    def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
+        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, or fewer if predictions take too long.
+        
+        The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick. 
+        Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago. 
+        """
+        last_prediction_counter = 0
+        last_prediction_time = 0
+        last_eval_time = 0
+        while self.fevals < max_fevals:
+            aqfs = self.multi_afs
+            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
+            if last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio:
+                last_prediction_counter = 0
+                pred_start = time.perf_counter()
+                if last_eval_time > 0.0:
+                    self.fit_observations_to_model()
+                predictions, _, std = self.predict_list(self.unvisited_cache)
+                last_prediction_time = time.perf_counter() - pred_start
+            else:
+                last_prediction_counter += 1
+            eval_start = time.perf_counter()
+            hyperparam = self.contextual_variance(std)
+            if self.__visited_num >= self.searchspace_size:
+                raise ValueError(self.error_message_searchspace_fully_observed)
+            for af in aqfs:
+                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
+                    break
+                list_of_acquisition_values = af(predictions, hyperparam)
+                best_af = self.argopt(list_of_acquisition_values)
+                del predictions[best_af]  # to avoid going out of bounds
+                candidate_params = self.unvisited_cache[best_af]
+                candidate_index = self.find_param_config_index(candidate_params)
+                observation = self.evaluate_objective_function(candidate_params)
+                self.update_after_evaluation(observation, candidate_index, candidate_params)
+            last_eval_time = time.perf_counter() - eval_start
+
     def af_random(self, predictions=None, hyperparam=None) -> list:
         """Acquisition function returning a randomly shuffled list for comparison."""
         list_random = range(len(self.unvisited_cache))

From 540151968ac9a8dec235c350bb4d088b3cf7c54c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 09:04:59 -0800
Subject: [PATCH 022/168] Changed supported Python versions to include 3.13,
 updated dependencies

---
 doc/requirements.txt | 175 +++++++++++++++++++++----------------------
 noxfile.py           |   2 +-
 pyproject.toml       |   7 +-
 3 files changed, 91 insertions(+), 93 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 766ee148d..b47d8ddaf 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,89 +1,86 @@
-alabaster==0.7.16 ; python_version >= "3.9" and python_version < "3.13"
-asttokens==2.4.1 ; python_version >= "3.9" and python_version < "3.13"
-attrs==23.2.0 ; python_version >= "3.9" and python_version < "3.13"
-babel==2.15.0 ; python_version >= "3.9" and python_version < "3.13"
-beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "3.13"
-bleach==6.1.0 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
-cffi==1.16.0 ; python_version >= "3.9" and python_version < "3.13" and implementation_name == "pypy"
-charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-decorator==5.1.1 ; python_version >= "3.9" and python_version < "3.13"
-defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "3.13"
-docutils==0.20.1 ; python_version >= "3.9" and python_version < "3.13"
-dom-toml==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-domdf-python-tools==3.8.1 ; python_version >= "3.9" and python_version < "3.13"
-exceptiongroup==1.2.1 ; python_version >= "3.9" and python_version < "3.11"
-executing==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
-fastjsonschema==2.19.1 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
-imagesize==1.4.1 ; python_version >= "3.9" and python_version < "3.13"
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.10"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-ipython==8.18.1 ; python_version >= "3.9" and python_version < "3.13"
-jedi==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
-jinja2==3.1.4 ; python_version >= "3.9" and python_version < "3.13"
-joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.13"
-jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "3.13"
-jsonschema==4.22.0 ; python_version >= "3.9" and python_version < "3.13"
-jupyter-client==8.6.2 ; python_version >= "3.9" and python_version < "3.13"
-jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "3.13"
-jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "3.13"
-markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13"
-matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "3.13"
-mistune==3.0.2 ; python_version >= "3.9" and python_version < "3.13"
-natsort==8.4.0 ; python_version >= "3.9" and python_version < "3.13"
-nbclient==0.10.0 ; python_version >= "3.9" and python_version < "3.13"
-nbconvert==7.16.4 ; python_version >= "3.9" and python_version < "3.13"
-nbformat==5.10.4 ; python_version >= "3.9" and python_version < "3.13"
-nbsphinx==0.9.4 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
-packaging==24.0 ; python_version >= "3.9" and python_version < "3.13"
-pandas==2.2.2 ; python_version >= "3.9" and python_version < "3.13"
-pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "3.13"
-parso==0.8.4 ; python_version >= "3.9" and python_version < "3.13"
-pexpect==4.9.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform != "win32"
-platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "3.13"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
-prompt-toolkit==3.0.43 ; python_version >= "3.9" and python_version < "3.13"
-ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform != "win32"
-pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "3.13"
-pycparser==2.22 ; python_version >= "3.9" and python_version < "3.13" and implementation_name == "pypy"
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pytest==8.2.1 ; python_version >= "3.9" and python_version < "3.13"
-python-constraint2==2.0.0b5 ; python_version >= "3.9" and python_version < "3.13"
-python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.13"
-pytz==2024.1 ; python_version >= "3.9" and python_version < "3.13"
-pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.13"
-pyzmq==26.0.3 ; python_version >= "3.9" and python_version < "3.13"
-referencing==0.35.1 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.32.2 ; python_version >= "3.9" and python_version < "3.13"
-rpds-py==0.18.1 ; python_version >= "3.9" and python_version < "3.13"
-scikit-learn==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
-six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
-snowballstemmer==2.2.0 ; python_version >= "3.9" and python_version < "3.13"
-soupsieve==2.5 ; python_version >= "3.9" and python_version < "3.13"
-sphinx-pyproject==0.3.0 ; python_version >= "3.9" and python_version < "3.13"
-sphinx-rtd-theme==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-sphinx==7.3.7 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-applehelp==1.0.8 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-devhelp==1.0.6 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-htmlhelp==2.0.5 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-jquery==4.1 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-qthelp==1.0.7 ; python_version >= "3.9" and python_version < "3.13"
-sphinxcontrib-serializinghtml==1.1.10 ; python_version >= "3.9" and python_version < "3.13"
-stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.13"
-threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.13"
-tinycss2==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
-tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
-tornado==6.4 ; python_version >= "3.9" and python_version < "3.13"
-traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.12.0 ; python_version >= "3.9" and python_version < "3.13"
-tzdata==2024.1 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
-wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.13"
-webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.13"
-xmltodict==0.13.0 ; python_version >= "3.9" and python_version < "3.13"
-zipp==3.18.2 ; python_version >= "3.9" and python_version < "3.10"
+alabaster==0.7.16 ; python_version >= "3.10" and python_version < "3.14"
+asttokens==2.4.1 ; python_version >= "3.10" and python_version < "3.14"
+attrs==24.2.0 ; python_version >= "3.10" and python_version < "3.14"
+babel==2.16.0 ; python_version >= "3.10" and python_version < "3.14"
+beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.14"
+bleach==6.2.0 ; python_version >= "3.10" and python_version < "3.14"
+certifi==2024.8.30 ; python_version >= "3.10" and python_version < "3.14"
+cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.14" and implementation_name == "pypy"
+charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "3.14"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "win32"
+decorator==5.1.1 ; python_version >= "3.10" and python_version < "3.14"
+defusedxml==0.7.1 ; python_version >= "3.10" and python_version < "3.14"
+docutils==0.20.1 ; python_version >= "3.10" and python_version < "3.14"
+dom-toml==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+domdf-python-tools==3.9.0 ; python_version >= "3.10" and python_version < "3.14"
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.1.0 ; python_version >= "3.10" and python_version < "3.14"
+fastjsonschema==2.20.0 ; python_version >= "3.10" and python_version < "3.14"
+idna==3.10 ; python_version >= "3.10" and python_version < "3.14"
+imagesize==1.4.1 ; python_version >= "3.10" and python_version < "3.14"
+iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+ipython==8.18.1 ; python_version >= "3.10" and python_version < "3.14"
+jedi==0.19.1 ; python_version >= "3.10" and python_version < "3.14"
+jinja2==3.1.4 ; python_version >= "3.10" and python_version < "3.14"
+joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.14"
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version < "3.14"
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "3.14"
+jupyter-client==8.6.3 ; python_version >= "3.10" and python_version < "3.14"
+jupyter-core==5.7.2 ; python_version >= "3.10" and python_version < "3.14"
+jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version < "3.14"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.14"
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version < "3.14"
+mistune==3.0.2 ; python_version >= "3.10" and python_version < "3.14"
+natsort==8.4.0 ; python_version >= "3.10" and python_version < "3.14"
+nbclient==0.10.0 ; python_version >= "3.10" and python_version < "3.14"
+nbconvert==7.16.4 ; python_version >= "3.10" and python_version < "3.14"
+nbformat==5.10.4 ; python_version >= "3.10" and python_version < "3.14"
+nbsphinx==0.9.5 ; python_version >= "3.10" and python_version < "3.14"
+numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.14"
+packaging==24.1 ; python_version >= "3.10" and python_version < "3.14"
+pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.14"
+pandocfilters==1.5.1 ; python_version >= "3.10" and python_version < "3.14"
+parso==0.8.4 ; python_version >= "3.10" and python_version < "3.14"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version < "3.14" and sys_platform != "win32"
+platformdirs==4.3.6 ; python_version >= "3.10" and python_version < "3.14"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version < "3.14"
+prompt-toolkit==3.0.48 ; python_version >= "3.10" and python_version < "3.14"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "3.14" and sys_platform != "win32"
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version < "3.14"
+pycparser==2.22 ; python_version >= "3.10" and python_version < "3.14" and implementation_name == "pypy"
+pygments==2.18.0 ; python_version >= "3.10" and python_version < "3.14"
+pytest==8.3.3 ; python_version >= "3.10" and python_version < "3.14"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.14"
+pytz==2024.2 ; python_version >= "3.10" and python_version < "3.14"
+pywin32==308 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.14"
+pyzmq==26.2.0 ; python_version >= "3.10" and python_version < "3.14"
+referencing==0.35.1 ; python_version >= "3.10" and python_version < "3.14"
+requests==2.32.3 ; python_version >= "3.10" and python_version < "3.14"
+rpds-py==0.20.1 ; python_version >= "3.10" and python_version < "3.14"
+scikit-learn==1.5.2 ; python_version >= "3.10" and python_version < "3.14"
+scipy==1.13.1 ; python_version >= "3.10" and python_version < "3.14"
+six==1.16.0 ; python_version >= "3.10" and python_version < "3.14"
+snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version < "3.14"
+soupsieve==2.6 ; python_version >= "3.10" and python_version < "3.14"
+sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinx==7.4.7 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version < "3.14"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version < "3.14"
+threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.14"
+tinycss2==1.4.0 ; python_version >= "3.10" and python_version < "3.14"
+tomli==2.0.2 ; python_version >= "3.10" and python_version < "3.14"
+tornado==6.4.1 ; python_version >= "3.10" and python_version < "3.14"
+traitlets==5.14.3 ; python_version >= "3.10" and python_version < "3.14"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.14"
+tzdata==2024.2 ; python_version >= "3.10" and python_version < "3.14"
+urllib3==2.2.3 ; python_version >= "3.10" and python_version < "3.14"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "3.14"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version < "3.14"
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version < "3.14"
diff --git a/noxfile.py b/noxfile.py
index 75c9ea902..016cf1cdd 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -15,7 +15,7 @@
 
 # set the test parameters
 verbose = False
-python_versions_to_test = ["3.9", "3.10", "3.11", "3.12"]
+python_versions_to_test = ["3.9", "3.10", "3.11", "3.12", "3.13"]
 nox.options.stop_on_first_error = True
 nox.options.error_on_missing_interpreters = True
 nox.options.default_venv_backend = 'virtualenv'
diff --git a/pyproject.toml b/pyproject.toml
index 323978437..8eb1fca51 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,12 +58,13 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.9,<3.13"          # NOTE when changing the supported Python versions, also change the test versions in the noxfile
-numpy = "^1.26.0"              # Python 3.12 requires numpy at least 1.26
+python = ">=3.10,<3.14"         # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+numpy = "^1.26.0"               # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
+ax = ">=0.4.3"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
-python-constraint2 = "^2.0.0b5"
+python-constraint2 = "^2.0.0b8"
 xmltodict = "*"
 pandas = ">=2.0.0"
 scikit-learn = ">=1.0.2"

From 04eacc455c2f71369d8b60a00fd9ac022df10cc4 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 19:10:33 -0800
Subject: [PATCH 023/168] Setup Searchspace to Ax SearchSpace conversion

---
 kernel_tuner/searchspace.py | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index cc569abc5..30ea2af03 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -50,9 +50,9 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.restrictions = restrictions
+        self.restrictions = restrictions.copy()
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions
+        self._modified_restrictions = restrictions.copy()
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None
@@ -788,3 +788,36 @@ def order_param_configs(
                 f"The number of ordered parameter configurations ({len(ordered_param_configs)}) differs from the original number of parameter configurations ({len(param_configs)})"
             )
         return ordered_param_configs
+    
+    def to_ax_searchspace(self):
+        """Convert this searchspace to an Ax SearchSpace."""
+        from ax import ChoiceParameter, FixedParameter, ParameterType, SearchSpace
+
+        # create searchspace
+        ax_searchspace = SearchSpace([])
+
+        # add the parameters
+        for param_name, param_values in self.tune_params.items():
+            if len(param_values) == 0:
+                continue
+
+            # convert the types
+            assert all(isinstance(param_values[0], type(v)) for v in param_values), f"Parameter values of mixed types are not supported: {param_values}"
+            param_type_mapping = {
+                str: ParameterType.STRING,
+                int: ParameterType.INT,
+                float: ParameterType.FLOAT,
+                bool: ParameterType.BOOL
+            }
+            param_type = param_type_mapping[type(param_values[0])]
+
+            # add the parameter
+            if len(param_values) == 1:
+                ax_searchspace.add_parameter(FixedParameter(param_name, param_type, param_values[0]))
+            else:
+                ax_searchspace.add_parameter(ChoiceParameter(param_name, param_type, param_values))
+
+        # add the constraints
+        raise NotImplementedError("Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity.")
+
+        return ax_searchspace

From 5f31dfcdd65a17079af5c454ee8734de14440d4e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 19:20:24 -0800
Subject: [PATCH 024/168] Implemented Ax as a BO strategy

---
 kernel_tuner/strategies/bayes_opt_ax.py | 29 +++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 kernel_tuner/strategies/bayes_opt_ax.py

diff --git a/kernel_tuner/strategies/bayes_opt_ax.py b/kernel_tuner/strategies/bayes_opt_ax.py
new file mode 100644
index 000000000..234c882c4
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_ax.py
@@ -0,0 +1,29 @@
+"""The strategy that uses particle swarm optimization."""
+
+from ax import optimize
+
+from kernel_tuner import util
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.common import (
+    CostFunc,
+)
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
+
+    ax_searchspace = searchspace.to_ax_searchspace()
+
+    try:
+        best_parameters, best_values, experiment, model = optimize(
+            parameters=ax_searchspace.parameters,
+            parameter_constraints=ax_searchspace.parameter_constraints,
+            # Booth function
+            evaluation_function=cost_func,
+            minimize=True,
+        )
+    except util.StopCriterionReached as e:
+        if tuning_options.verbose:
+            print(e)
+
+    return cost_func.results

From 2e4f490c6c1a3d7fef7af79ac4e842701ed0cdbf Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:34:33 -0800
Subject: [PATCH 025/168] Made BO compatible with StopCriterion

---
 kernel_tuner/strategies/bayes_opt.py | 22 ++++++++++++++--------
 pyproject.toml                       |  2 +-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 47f82e3a9..e4c9c52a2 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -1,4 +1,5 @@
 """Bayesian Optimization implementation from the thesis by Willemsen."""
+
 import itertools
 import time
 import warnings
@@ -13,6 +14,7 @@
 # BO imports
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import CostFunc
+from kernel_tuner.util import StopCriterionReached
 
 try:
     from sklearn.gaussian_process import GaussianProcessRegressor
@@ -137,11 +139,12 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         bo = BayesianOptimization(
             parameter_space, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func
         )
-    except util.StopCriterionReached as e:
-        print(
+    except StopCriterionReached:
+        warnings.warn(
             "Stop criterion reached during initialization, was popsize (default 20) greater than max_fevals or the alotted time?"
         )
-        raise e
+        return cost_func.results
+        # raise e
     try:
         if max_fevals - bo.fevals <= 0:
             raise ValueError("No function evaluations left for optimization after sampling")
@@ -847,9 +850,9 @@ def __optimize_multi_fast(self, max_fevals):
 
     def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
         """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, or fewer if predictions take too long.
-        
-        The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick. 
-        Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago. 
+
+        The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick.
+        Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago.
         """
         last_prediction_counter = 0
         last_prediction_time = 0
@@ -857,7 +860,10 @@ def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            if last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio:
+            if (
+                last_prediction_time * predict_eval_ratio <= last_eval_time
+                or last_prediction_counter >= predict_eval_ratio
+            ):
                 last_prediction_counter = 0
                 pred_start = time.perf_counter()
                 if last_eval_time > 0.0:
@@ -979,4 +985,4 @@ def visualize_after_opt(self):
         plt.plot(x_axis, mu, label="predictions", linestyle=" ", marker=".")
         plt.plot(x_axis, brute_force_observations, label="actual", linestyle=" ", marker=".")
         plt.legend()
-        plt.show()
\ No newline at end of file
+        plt.show()
diff --git a/pyproject.toml b/pyproject.toml
index 8eb1fca51..6a53b8556 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,7 +61,7 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 python = ">=3.10,<3.14"         # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"               # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
-ax = ">=0.4.3"
+ax-platform = ">=0.4.3"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
 python-constraint2 = "^2.0.0b8"

From 705e724622b69c14b9a131f6b26a8e17d7ab1e9e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:35:24 -0800
Subject: [PATCH 026/168] Minor compatbility change to BO strategies

---
 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py |  2 +-
 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py   | 14 +++-----------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
index cc991dadf..d584c0e3b 100644
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
@@ -694,7 +694,7 @@ def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
         return self.all_results
 
     def objective_function(self, param_config: tuple) -> float:
-        return self.runner.run(param_config, self.tuning_options)
+        return self.runner.run([param_config], self.tuning_options)
 
     def evaluate_config(self, param_config_index: int) -> float:
         """Evaluates a parameter configuration, returns the time."""
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
index 891db5236..cf733cdde 100644
--- a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
@@ -1,17 +1,10 @@
-""" BOTorch package from https://github.com/pytorch/botorch """
+"""BOTorch package from https://github.com/pytorch/botorch."""
 from __future__ import print_function
 
 from collections import OrderedDict
-import numpy as np
 
 try:
-    import torch
-    from botorch.models import SingleTaskGP
-    from botorch.fit import fit_gpytorch_model
-    from botorch.utils import standardize
-    from gpytorch.mlls import ExactMarginalLogLikelihood
-    from botorch.acquisition import UpperConfidenceBound
-    from botorch.optim import optimize_acqf
+    pass
 except Exception:
     BayesianOptimization = None
     bayes_opt_present = False
@@ -22,7 +15,7 @@
 
 
 def tune(runner, kernel_options, device_options, tuning_options):
-    """ Find the best performing kernel configuration in the parameter space
+    """Find the best performing kernel configuration in the parameter space.
 
     :params runner: A runner from kernel_tuner.runners
     :type runner: kernel_tuner.runner
@@ -44,7 +37,6 @@ def tune(runner, kernel_options, device_options, tuning_options):
     :rtype: list(dict()), dict()
 
     """
-
     if not bayes_opt_present:
         raise ImportError("Error: optional dependency Bayesian Optimization not installed")
     init_points = tuning_options.strategy_options.get("popsize", 20)

From 6cde57e59aefd83c2edbf312859c1cd3f100c7c2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:37:05 -0800
Subject: [PATCH 027/168] Extended hyperparameter tuning benchmark

---
 kernel_tuner/backends/hypertuner.py     |  7 ++++++-
 kernel_tuner/hyper.py                   | 27 ++++++++++++++++---------
 kernel_tuner/strategies/bayes_opt_ax.py |  2 +-
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 53e5dd6da..0b1c69adb 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -64,8 +64,13 @@ def compile(self, kernel_instance):
         # gpus = ["RTX_3090", "RTX_2080_Ti"]
         # applications = None
 
-        gpus = ["A100", "A4000", "MI50", "MI250X", "W6600"]
+        gpus = ["A100", "A4000", "MI250X", "W6600"]
         applications = [
+            {
+                "name": "dedispersion_milo",
+                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "input_file": "dedispersion_milo.json"
+            },
             {
                 "name": "convolution_milo",
                 "folder": "../autotuning_methodology/cached_data_used/kernels",
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 8c0fb5d4e..3d2dfffa7 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -60,7 +60,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
 
     # pass a temporary cache file to avoid duplicate execution
     cachefile = get_random_unique_filename('temp_', '.json')
-    cachefile = Path("hyperparamtuning_milo_bruteforce.json")
+    cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
     kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
@@ -78,7 +78,7 @@ def put_if_not_present(target_dict, key, value):
                                     objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs)
     
     # remove the temporary cachefile and return only unique results in order
-    cachefile.unlink()
+    # cachefile.unlink()
     result_unique = dict()
     for r in result:
         config_id = ",".join(str(r[k]) for k in hyper_params.keys())
@@ -87,14 +87,15 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
-    hyperparams = {
-        'popsize': [10, 20, 30],
-        'maxiter': [50, 100, 150],
-        'w': [0.25, 0.5, 0.75],
-        'c1': [1.0, 2.0, 3.0],
-        'c2': [0.5, 1.0, 1.5]
-    }
-    result, env = tune_hyper_params('pso', hyperparams)
+    # hyperparams = {
+    #     'popsize': [10, 20, 30],
+    #     'maxiter': [50, 100, 150],
+    #     'w': [0.25, 0.5, 0.75],
+    #     'c1': [1.0, 2.0, 3.0],
+    #     'c2': [0.5, 1.0, 1.5]
+    # }
+    # result, env = tune_hyper_params('pso', hyperparams)
+
     # hyperparams = {
     #     'neighbor': ['Hamming', 'adjacent'],
     #     'restart': [True, False],
@@ -102,5 +103,11 @@ def put_if_not_present(target_dict, key, value):
     #     'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
     # }
     # result, env = tune_hyper_params('greedy_ils', hyperparams)
+
+    hyperparams = {
+        'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
+    }
+    result, env = tune_hyper_params('dual_annealing', hyperparams)
+
     print(result)
     print(env['best_config'])
diff --git a/kernel_tuner/strategies/bayes_opt_ax.py b/kernel_tuner/strategies/bayes_opt_ax.py
index 234c882c4..2bb3ce8fc 100644
--- a/kernel_tuner/strategies/bayes_opt_ax.py
+++ b/kernel_tuner/strategies/bayes_opt_ax.py
@@ -1,4 +1,4 @@
-"""The strategy that uses particle swarm optimization."""
+"""Bayesian Optimization implementation using the Ax platform."""
 
 from ax import optimize
 

From aed5f0d430f5830d6a7fc7bf400fcc3c782142ce Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 20:38:07 -0800
Subject: [PATCH 028/168] Implemented Bayesian Optimization using BOTorch

---
 kernel_tuner/interface.py                    |  6 +-
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 76 ++++++++++++++++++++
 2 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 2bfa06a89..e9469ec6d 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -51,6 +51,7 @@
     basinhopping,
     bayes_opt,
     bayes_opt_alt_BOTorch,
+    bayes_opt_BOTorch,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -88,7 +89,8 @@
     "bayes_opt_old": bayes_opt_old,
     "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
-    "bayes_opt_BOTorch": bayes_opt_alt_BOTorch,
+    "bayes_opt_BOTorch": bayes_opt_BOTorch,
+    "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
 }
 
 
@@ -618,7 +620,7 @@ def tune_kernel(
         and not callable(restrictions)
         and not any(isinstance(r, Constraint) for r in restrictions)
     ):
-        restrictions = util.parse_restrictions(restrictions, tune_params)
+        restrictions = util.compile_restrictions(restrictions, tune_params)
 
     # sort all the options into separate dicts
     opts = locals()
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
new file mode 100644
index 000000000..d7a88bab5
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -0,0 +1,76 @@
+"""Bayesian Optimization implementation using BO Torch."""
+
+import numpy as np
+import torch
+from botorch import fit_gpytorch_model
+from botorch.acquisition import ExpectedImprovement
+from botorch.models import SingleTaskGP
+from botorch.optim import optimize_acqf_discrete
+from gpytorch.mlls import ExactMarginalLogLikelihood
+from torch import Tensor
+
+from kernel_tuner import util
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.common import (
+    CostFunc,
+)
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+
+    # function to optimize
+    def evaluate_function(X):
+        if isinstance(X, (Tensor, list)):
+            results = []
+            if X.dim() == 1:
+                results = [[cost_func(X)]]
+            else:
+                results = [[cost_func(c)] for c in X]
+            return torch.from_numpy(np.array(results))
+        else:
+            raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
+
+    # set up conversion to tensors
+    full_space = torch.from_numpy(searchspace.get_list_numpy().astype(float))
+
+    # get bounds
+    bounds = []
+    for v in searchspace.params_values:
+        bounds.append([min(v), max(v)])
+    bounds = torch.from_numpy(np.array(bounds).transpose())
+
+    try:
+        # take initial sample
+        sample_indices = torch.from_numpy(searchspace.get_random_sample_indices(initial_sample_size))
+        train_X = full_space.index_select(0, sample_indices)
+        train_Y = evaluate_function(train_X)
+
+        # Bayesian optimization loop
+        for _ in range(max_fevals):
+            # Fit a Gaussian Process model
+            gp = SingleTaskGP(train_X, train_Y)
+            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
+            fit_gpytorch_model(mll)
+            
+            # Define the acquisition function
+            ei = ExpectedImprovement(model=gp, best_f=train_Y.min(), maximize=False)
+            
+            # Optimize acquisition function to find the next evaluation point
+            candidate, _ = optimize_acqf_discrete(
+                ei, 
+                q=1, 
+                choices=full_space
+            )
+            
+            # Evaluate the new candidate and update the dataset
+            new_y = evaluate_function(candidate)
+            train_X = torch.cat([train_X, candidate])
+            train_Y = torch.cat([train_Y, new_y])
+    except util.StopCriterionReached as e:
+        if tuning_options.verbose:
+            print(e)
+
+    return cost_func.results

From 8c0dc497645b58f2640d91cbf735161c48144fdc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:13:01 -0800
Subject: [PATCH 029/168] Automatically time out any PyTest that takes longer
 than 60 seconds

---
 pyproject.toml   | 3 ++-
 test/conftest.py | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 test/conftest.py

diff --git a/pyproject.toml b/pyproject.toml
index 6a53b8556..3d8511493 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -114,13 +114,14 @@ markupsafe = "^2.0.1"       # TODO why do we need markupsafe here?
 optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^8.2.0"
+pytest-timeout = "^2.3.1"
 pytest-cov = "^5.0.0"
 mock = "^5.1.0"
 nox = "^2024.4.15"
 nox-poetry = "^1.0.3"
 ruff = "^0.4.4"
 pep440 = "^0.1.2"
-tomli = "^2.0.1"      # held back by Python <= 3.10, can be replaced by built-in [tomllib](https://docs.python.org/3.11/library/tomllib.html) from Python 3.11 onwards
+tomli = "^2.0.1"          # held back by Python <= 3.10, can be replaced by built-in [tomllib](https://docs.python.org/3.11/library/tomllib.html) from Python 3.11 onwards
 
 # development dependencies are unused for now, as this is already covered by test and docs
 # # ATTENTION: if anything is changed here, run `poetry update`
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..1539a6cdf
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+
+
+def pytest_collection_modifyitems(items):
+    for item in items:
+        if item.get_closest_marker('timeout') is None:
+            item.add_marker(pytest.mark.timeout(60))
\ No newline at end of file

From b9b748d8ee22a633e14840ca81972a3bce56b6ac Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:15:06 -0800
Subject: [PATCH 030/168] Avoided inadvertent use of cache in
 hyperparametertuning tests

---
 test/strategies/test_strategies.py | 2 ++
 test/test_hyper.py                 | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 57c43b4f7..11b231e62 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -37,6 +37,8 @@ def vector_add():
 # skip some strategies if their dependencies are not installed
 strategies = []
 for s in strategy_map.keys():
+    if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower() or 'bayes_opt_old' in s.lower():
+        continue
     if 'gpytorch' in s.lower():
         strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
     elif 'botorch' in s.lower():
diff --git a/test/test_hyper.py b/test/test_hyper.py
index 7aab219ef..d34294585 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -15,6 +15,6 @@ def test_hyper(env):
 
     target_strategy = "genetic_algorithm"
 
-    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True)
+    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True, cache=None)
     assert len(result) == 2
     assert 'best_config' in env

From 177802628483f3a9dff7dea304d8806721ad09fe Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:15:26 -0800
Subject: [PATCH 031/168] Avoided inadvertent use of cache in
 hyperparametertuning tests

---
 kernel_tuner/hyper.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 3d2dfffa7..b84912a8b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -59,9 +59,10 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
         del kwargs['iterations']
 
     # pass a temporary cache file to avoid duplicate execution
-    cachefile = get_random_unique_filename('temp_', '.json')
-    cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
-    kwargs['cache'] = str(cachefile)
+    if 'cache' not in kwargs:
+        cachefile = get_random_unique_filename('temp_', '.json')
+        cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
+        kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
         target_dict[key] = value if key not in target_dict else target_dict[key]

From 034352fbe7f7f27f8d94a4e6efa5a165d3e10b3a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 4 Nov 2024 21:15:57 -0800
Subject: [PATCH 032/168] Shallow copy if the restrictions are copiable

---
 kernel_tuner/searchspace.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 30ea2af03..ca30e2563 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -50,9 +50,9 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.restrictions = restrictions.copy()
+        self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions.copy()
+        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None

From eba03f83689c70ed3a886cbdaa43a60acaabcfe7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 5 Nov 2024 15:46:12 -0800
Subject: [PATCH 033/168] Refactored BO BOTorch into class structure

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 130 +++++++++++--------
 1 file changed, 77 insertions(+), 53 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index d7a88bab5..6ef703674 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -1,13 +1,18 @@
 """Bayesian Optimization implementation using BO Torch."""
 
 import numpy as np
-import torch
-from botorch import fit_gpytorch_model
-from botorch.acquisition import ExpectedImprovement
-from botorch.models import SingleTaskGP
-from botorch.optim import optimize_acqf_discrete
-from gpytorch.mlls import ExactMarginalLogLikelihood
-from torch import Tensor
+
+try:
+    import torch
+    from botorch import fit_gpytorch_model
+    from botorch.acquisition import ExpectedImprovement
+    from botorch.models import SingleTaskGP
+    from botorch.optim import optimize_acqf_discrete
+    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
@@ -18,59 +23,78 @@
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+    bo = BayesianOptimization(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimization():
+    """Bayesian Optimization class."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        self.initial_sample_taken = False
+        self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
+        self.tuning_options = tuning_options
+        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+
+        # set up conversion to tensors
+        self.searchspace = searchspace
+        self.searchspace_tensors = torch.from_numpy(searchspace.get_list_numpy().astype(float))
+        self.train_X = torch.empty_like(self.searchspace_tensors)
+        self.train_Y = torch.empty(len(self.train_X))
 
-    # function to optimize
-    def evaluate_function(X):
-        if isinstance(X, (Tensor, list)):
+        # get bounds
+        bounds = []
+        for v in searchspace.params_values:
+            bounds.append([min(v), max(v)])
+        bounds = torch.from_numpy(np.array(bounds).transpose())
+
+    def evaluate_configs(self, X: Tensor):
+        """Evaluate a tensor of one or multiple configurations."""
+        if isinstance(X, Tensor):
             results = []
             if X.dim() == 1:
-                results = [[cost_func(X)]]
+                results = [[self.cost_func(X)]]
             else:
-                results = [[cost_func(c)] for c in X]
+                results = [[self.cost_func(c)] for c in X]
             return torch.from_numpy(np.array(results))
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
+        
+    def initial_sample(self):
+        """Take an initial sample."""
+        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size))
+        self.train_X = self.searchspace_tensors.index_select(0, sample_indices)
+        self.train_Y = self.evaluate_configs(self.train_X)
+        self.initial_sample_taken = True
 
-    # set up conversion to tensors
-    full_space = torch.from_numpy(searchspace.get_list_numpy().astype(float))
-
-    # get bounds
-    bounds = []
-    for v in searchspace.params_values:
-        bounds.append([min(v), max(v)])
-    bounds = torch.from_numpy(np.array(bounds).transpose())
-
-    try:
-        # take initial sample
-        sample_indices = torch.from_numpy(searchspace.get_random_sample_indices(initial_sample_size))
-        train_X = full_space.index_select(0, sample_indices)
-        train_Y = evaluate_function(train_X)
+    def run(self, max_fevals: int):
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
 
-        # Bayesian optimization loop
-        for _ in range(max_fevals):
-            # Fit a Gaussian Process model
-            gp = SingleTaskGP(train_X, train_Y)
-            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
-            fit_gpytorch_model(mll)
-            
-            # Define the acquisition function
-            ei = ExpectedImprovement(model=gp, best_f=train_Y.min(), maximize=False)
-            
-            # Optimize acquisition function to find the next evaluation point
-            candidate, _ = optimize_acqf_discrete(
-                ei, 
-                q=1, 
-                choices=full_space
-            )
-            
-            # Evaluate the new candidate and update the dataset
-            new_y = evaluate_function(candidate)
-            train_X = torch.cat([train_X, candidate])
-            train_Y = torch.cat([train_Y, new_y])
-    except util.StopCriterionReached as e:
-        if tuning_options.verbose:
-            print(e)
+            # Bayesian optimization loop
+            for _ in range(max_fevals):
+                # Fit a Gaussian Process model
+                gp = SingleTaskGP(self.train_X, self.train_Y)
+                mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
+                fit_gpytorch_model(mll)
+                
+                # Define the acquisition function
+                ei = ExpectedImprovement(model=gp, best_f=self.train_Y.min(), maximize=False)
+                
+                # Optimize acquisition function to find the next evaluation point
+                candidate, _ = optimize_acqf_discrete(
+                    ei, 
+                    q=1, 
+                    choices=self.searchspace_tensors
+                )
+                
+                # Evaluate the new candidate and update the dataset
+                new_y = self.evaluate_configs(candidate)
+                self.train_X = torch.cat([self.train_X, candidate])
+                self.train_Y = torch.cat([self.train_Y, new_y])
+        except util.StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
 
-    return cost_func.results
+        return self.cost_func.results 

From c6b243ab952d8c80437053dc61fd3933296aa15b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 01:34:44 -0800
Subject: [PATCH 034/168] Switched to newer fit function, more efficient model
 initialization by reusing state

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 23 +++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 6ef703674..c63c836e1 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -4,7 +4,7 @@
 
 try:
     import torch
-    from botorch import fit_gpytorch_model
+    from botorch import fit_gpytorch_mll
     from botorch.acquisition import ExpectedImprovement
     from botorch.models import SingleTaskGP
     from botorch.optim import optimize_acqf_discrete
@@ -66,21 +66,29 @@ def initial_sample(self):
         self.train_Y = self.evaluate_configs(self.train_X)
         self.initial_sample_taken = True
 
+    def initialize_model(self, state_dict=None):
+        """Initialize the model, possibly with a state dict for faster fitting."""
+        model = SingleTaskGP(self.train_X, self.train_Y)
+        mll = ExactMarginalLogLikelihood(model.likelihood, model)
+        # SumMarginalLogLikelihood
+        if state_dict is not None:
+            model.load_state_dict(state_dict)
+        return mll, model
+
     def run(self, max_fevals: int):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
+                mll, model = self.initialize_model()
 
             # Bayesian optimization loop
             for _ in range(max_fevals):
-                # Fit a Gaussian Process model
-                gp = SingleTaskGP(self.train_X, self.train_Y)
-                mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
-                fit_gpytorch_model(mll)
+                # fit a Gaussian Process model
+                fit_gpytorch_mll(mll)
                 
                 # Define the acquisition function
-                ei = ExpectedImprovement(model=gp, best_f=self.train_Y.min(), maximize=False)
+                ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
                 candidate, _ = optimize_acqf_discrete(
@@ -93,6 +101,9 @@ def run(self, max_fevals: int):
                 new_y = self.evaluate_configs(candidate)
                 self.train_X = torch.cat([self.train_X, candidate])
                 self.train_Y = torch.cat([self.train_Y, new_y])
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                mll, model = self.initialize_model(model.state_dict())
         except util.StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From 15818401451abf1e92138c3ad8f0e99e1152ccdb Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 11:45:42 -0800
Subject: [PATCH 035/168] Added option to return invalid configurations in
 CostFunc

---
 kernel_tuner/strategies/common.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 3420c86ea..717d2ca7e 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -53,10 +53,11 @@ def get_options(strategy_options, options):
 
 
 class CostFunc:
-    def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True):
+    def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False):
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
+        self.return_invalid = return_invalid
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
@@ -111,8 +112,11 @@ def __call__(self, x, check_restrictions=True):
             self.runner.last_strategy_start_time = perf_counter()
 
         # get numerical return value, taking optimization direction into account
-        return_value = result[self.tuning_options.objective] or sys.float_info.max
-        return_value = return_value if not self.tuning_options.objective_higher_is_better else -return_value
+        if self.return_invalid:
+            return_value = result[self.tuning_options.objective]
+        else:
+            return_value = result[self.tuning_options.objective] or sys.float_info.max
+        return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value
 
         return return_value
 

From 620ee60d30c3ae4dca423d7544b415ebb4ef5e7b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 12:29:33 -0800
Subject: [PATCH 036/168] Added the handling of invalid configurations,
 training data is directly modified by the evaluation function

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 52 +++++++++++++-------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index c63c836e1..9009bed8e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -33,7 +33,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.initial_sample_taken = False
         self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
-        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
         # set up conversion to tensors
         self.searchspace = searchspace
@@ -41,29 +41,47 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.train_X = torch.empty_like(self.searchspace_tensors)
         self.train_Y = torch.empty(len(self.train_X))
 
-        # get bounds
-        bounds = []
-        for v in searchspace.params_values:
-            bounds.append([min(v), max(v)])
-        bounds = torch.from_numpy(np.array(bounds).transpose())
+        # # get bounds
+        # bounds = []
+        # for v in searchspace.params_values:
+        #     bounds.append([min(v), max(v)])
+        # bounds = torch.from_numpy(np.array(bounds).transpose())
+
+    def run_config(self, config):
+        """Run a single configuration. Returns the result and whether it is valid."""
+        result = self.cost_func(config)
+        valid = not isinstance(result, util.ErrorConfig)
+        if not valid:
+            result = np.nan
+        return result, valid
 
     def evaluate_configs(self, X: Tensor):
-        """Evaluate a tensor of one or multiple configurations."""
+        """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
         if isinstance(X, Tensor):
-            results = []
+            valid_configs = []
+            valid_results = []
             if X.dim() == 1:
-                results = [[self.cost_func(X)]]
-            else:
-                results = [[self.cost_func(c)] for c in X]
-            return torch.from_numpy(np.array(results))
+                X = [X]
+            for config in X:
+                res, valid = self.run_config(config)
+                if valid:
+                    valid_configs.append([config])
+                    valid_results.append([res])
+                else:
+                    # remove invalid configurations from the full searchspace
+                    index = self.searchspace.get_param_config_index(config)
+                    self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+            # add valid results to the training set
+            self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
+            self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
     def initial_sample(self):
         """Take an initial sample."""
         sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size))
-        self.train_X = self.searchspace_tensors.index_select(0, sample_indices)
-        self.train_Y = self.evaluate_configs(self.train_X)
+        sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
+        self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
 
     def initialize_model(self, state_dict=None):
@@ -97,10 +115,8 @@ def run(self, max_fevals: int):
                     choices=self.searchspace_tensors
                 )
                 
-                # Evaluate the new candidate and update the dataset
-                new_y = self.evaluate_configs(candidate)
-                self.train_X = torch.cat([self.train_X, candidate])
-                self.train_Y = torch.cat([self.train_Y, new_y])
+                # evaluate the new candidate
+                self.evaluate_configs(candidate)
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 mll, model = self.initialize_model(model.state_dict())

From 009cf01d195e83853503173b565dc3534e5b5740 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 6 Nov 2024 13:16:34 -0800
Subject: [PATCH 037/168] Setup structure for Tensorspace in Searchspace

---
 kernel_tuner/searchspace.py                  | 42 +++++++++++++++++++-
 kernel_tuner/strategies/bayes_opt_BOTorch.py |  3 +-
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index ca30e2563..2b3946875 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -2,7 +2,7 @@
 import re
 from pathlib import Path
 from random import choice, shuffle
-from typing import List
+from typing import List, Union
 
 import numpy as np
 from constraint import (
@@ -17,6 +17,13 @@
     Solver,
 )
 
+try:
+    import torch
+    from torch import Tensor
+    torch_available = True
+except ImportError:
+    torch_available = False
+
 from kernel_tuner.util import check_restrictions as check_instance_restrictions
 from kernel_tuner.util import compile_restrictions, default_block_size_names
 
@@ -50,6 +57,7 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
+        self.tensorspace = None
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -573,10 +581,40 @@ def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices)
         return list(map(self.list.__getitem__, indices))
 
-    def get_param_config_index(self, param_config: tuple):
+    def get_param_config_index(self, param_config: Union[tuple, Tensor]):
         """Lookup the index for a parameter configuration, returns None if not found."""
+        if torch_available and isinstance(param_config, Tensor):
+            param_config = self.tensor_to_param_config(param_config)
         # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space
         return self.__dict.get(param_config, None)
+    
+    def initialize_tensorspace(self):
+        """Encode the searchspace as floats in a Tensor. Save the mapping."""
+        self._map_tensor_to_param = []  # TODO
+        self._map_param_to_tensor = []  # TODO
+        numpy_repr = self.get_list_numpy()
+        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 0, numpy_repr)
+        self.tensorspace = torch.from_numpy(numpy_repr.astype(float))
+    
+    def get_tensorspace(self):
+        """Get the searchspace encoded in a Tensor."""
+        if self.tensorspace is None:
+            self.initialize_tensorspace()
+        return self.tensorspace
+    
+    def param_config_to_tensor(self, param_config: tuple):
+        """Convert from a parameter configuration to a Tensor."""
+        if self.tensorspace is None:
+            self.initialize_tensorspace()
+        # TODO
+        raise NotImplementedError()
+    
+    def tensor_to_param_config(self, tensor: Tensor):
+        """Convert from a Tensor to a parameter configuration."""
+        if self.tensorspace is None:
+            self.initialize_tensorspace()
+        # TODO
+        raise NotImplementedError()
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 9009bed8e..4a86598fe 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -37,7 +37,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
 
         # set up conversion to tensors
         self.searchspace = searchspace
-        self.searchspace_tensors = torch.from_numpy(searchspace.get_list_numpy().astype(float))
+        self.searchspace_tensors = searchspace.get_tensorspace()
         self.train_X = torch.empty_like(self.searchspace_tensors)
         self.train_Y = torch.empty(len(self.train_X))
 
@@ -63,6 +63,7 @@ def evaluate_configs(self, X: Tensor):
             if X.dim() == 1:
                 X = [X]
             for config in X:
+                assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
                 res, valid = self.run_config(config)
                 if valid:
                     valid_configs.append([config])

From 33983f7c21b1f5bc39aec67d14bc426d295798c0 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 01:08:22 -0800
Subject: [PATCH 038/168] Implemented mappings and conversions to and from
 tensor to parameter configuration

---
 kernel_tuner/searchspace.py | 40 ++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 2b3946875..36001c835 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -1,4 +1,5 @@
 import ast
+import numbers
 import re
 from pathlib import Path
 from random import choice, shuffle
@@ -58,6 +59,9 @@ def __init__(
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
         self.tensorspace = None
+        self.tensor_categorical_dimensions = []
+        self._map_tensor_to_param = []
+        self._map_param_to_tensor = []
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -590,10 +594,21 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
     
     def initialize_tensorspace(self):
         """Encode the searchspace as floats in a Tensor. Save the mapping."""
-        self._map_tensor_to_param = []  # TODO
-        self._map_param_to_tensor = []  # TODO
+        assert self.tensorspace is None, "Tensorspace is already initialized"
+
+        # generate the mappings to and from tensor values
+        for index, param_values in enumerate(self.params_values):
+            if all(isinstance(v, numbers.Real) for v in param_values):
+                tensor_values = np.array(param_values).astype(float)
+            else:
+                self.tensor_categorical_dimensions.append(index)
+                tensor_values = np.arange(len(param_values))
+            self._map_param_to_tensor.append(dict(zip(param_values, tensor_values)))
+            self._map_tensor_to_param.append(dict(zip(tensor_values, param_values)))
+
+        # apply the mappings on the full searchspace
         numpy_repr = self.get_list_numpy()
-        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 0, numpy_repr)
+        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
         self.tensorspace = torch.from_numpy(numpy_repr.astype(float))
     
     def get_tensorspace(self):
@@ -604,17 +619,24 @@ def get_tensorspace(self):
     
     def param_config_to_tensor(self, param_config: tuple):
         """Convert from a parameter configuration to a Tensor."""
-        if self.tensorspace is None:
+        if len(self._map_param_to_tensor) == 0:
             self.initialize_tensorspace()
-        # TODO
-        raise NotImplementedError()
+        array = []
+        for i, param in enumerate(param_config):
+            array.append(self._map_param_to_tensor[i][param])
+        # TODO write tests
+        return torch.from_numpy(np.array(array))
     
     def tensor_to_param_config(self, tensor: Tensor):
         """Convert from a Tensor to a parameter configuration."""
-        if self.tensorspace is None:
+        assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
+        if len(self._map_tensor_to_param) == 0:
             self.initialize_tensorspace()
-        # TODO
-        raise NotImplementedError()
+        config = []
+        for i, param in enumerate(tensor):
+            config.append(self._map_tensor_to_param[i][param])
+        # TODO write tests
+        return tuple(config)
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""

From f3fc81b19a7271518ae3909aa6463b8d794075f7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 09:40:41 -0800
Subject: [PATCH 039/168] Improved efficiency of acquisition function by
 removing evaluated configurations

---
 kernel_tuner/hyper.py                        |  2 +-
 kernel_tuner/searchspace.py                  |  2 +-
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 15 +++++++++------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index b84912a8b..9c052d033 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -61,7 +61,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
     # pass a temporary cache file to avoid duplicate execution
     if 'cache' not in kwargs:
         cachefile = get_random_unique_filename('temp_', '.json')
-        cachefile = Path("hyperparamtuning_milo_bruteforce_greedy_ils.json")
+        cachefile = Path("hyperparamtuning_milo_bruteforce_dual_annealing.json")
         kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 36001c835..69738bc12 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -634,7 +634,7 @@ def tensor_to_param_config(self, tensor: Tensor):
             self.initialize_tensorspace()
         config = []
         for i, param in enumerate(tensor):
-            config.append(self._map_tensor_to_param[i][param])
+            config.append(self._map_tensor_to_param[i][float(param)])
         # TODO write tests
         return tuple(config)
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 4a86598fe..dd7c3e956 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -47,7 +47,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         #     bounds.append([min(v), max(v)])
         # bounds = torch.from_numpy(np.array(bounds).transpose())
 
-    def run_config(self, config):
+    def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result = self.cost_func(config)
         valid = not isinstance(result, util.ErrorConfig)
@@ -64,14 +64,16 @@ def evaluate_configs(self, X: Tensor):
                 X = [X]
             for config in X:
                 assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
-                res, valid = self.run_config(config)
+                param_config = self.searchspace.tensor_to_param_config(config)
+                res, valid = self.run_config(param_config)
                 if valid:
                     valid_configs.append([config])
                     valid_results.append([res])
-                else:
-                    # remove invalid configurations from the full searchspace
-                    index = self.searchspace.get_param_config_index(config)
-                    self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+                
+                # remove evaluated configurations from the full searchspace
+                index = self.searchspace.get_param_config_index(param_config)
+                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+
             # add valid results to the training set
             self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
             self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
@@ -110,6 +112,7 @@ def run(self, max_fevals: int):
                 ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
+                # TODO look into how to handle categorical parameters with MixedSingleTaskGP
                 candidate, _ = optimize_acqf_discrete(
                     ei, 
                     q=1, 

From a5a04716064c2c8217003132271ccebb79ed9888 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 09:41:36 -0800
Subject: [PATCH 040/168] Removed Ax, added BOTorch as dependency

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3d8511493..9d09dbcc5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,10 +58,10 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.10,<3.14"         # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+python = ">=3.10,<3.14"         # TODO from >=3.10, use | instead of Union[] # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"               # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
-ax-platform = ">=0.4.3"
+botorch = ">=0.12.0"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
 python-constraint2 = "^2.0.0b8"

From 9429539f7321ef0f4487973b5f17a41fd0829129 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 09:42:20 -0800
Subject: [PATCH 041/168] Convenience script for benchmarking BO

---
 .gitignore |   1 +
 tune_bo.py | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 160 insertions(+)
 create mode 100644 tune_bo.py

diff --git a/.gitignore b/.gitignore
index 47ffc4024..eb59e44cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ poetry.lock
 noxenv.txt
 noxsettings.toml
 hyperparamtuning/*
+*.prof
 
 ### Python ###
 *.pyc
diff --git a/tune_bo.py b/tune_bo.py
new file mode 100644
index 000000000..81f1fe999
--- /dev/null
+++ b/tune_bo.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+from collections import OrderedDict
+from pathlib import Path
+
+import numpy
+
+import kernel_tuner
+
+# file_path_results = "../last_run/_tune_configuration-results.json"
+# file_path_metadata = "../last_run/_tune_configuration-metadata.json"
+
+
+def ops(w, h, fw, fh):
+    return (w * h * fw * fh * 2) / 1e9
+
+
+unit = "GFLOP"
+w = h = 4096
+fw = fh = 15
+inputs = [w, h, fw, fh]
+total_flops = ops(w, h, fw, fh)
+
+
+# def tune(inputs, lang, strategy):
+def tune(
+    device_name: str,
+    strategy="bayes_opt_BOTorch",
+    strategy_options={ 'max_fevals': 150 },
+    verbose=True,
+    quiet=False,
+    simulation_mode=True,
+    lang="CUDA",
+    profiling=True,
+):  
+    directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
+    assert directory.exists()
+    if lang == "CUDA":
+        kernel_file = directory / "kernels/convolution_milo.cu"
+    elif lang == "HIP":
+        kernel_file = directory / "kernels/convolution_milo.cu.hip"
+    else:
+        raise ValueError(f"Invalid {lang=}")
+
+    with kernel_file.open() as fp:
+        kernel_string = fp.read()
+
+    # setup tunable parameters
+    tune_params = OrderedDict()
+
+    # tune_params["pwr_limit"] = get_pwr_limit(pwr_limit, 0)
+
+    image_width, image_height, filter_width, filter_height = inputs
+
+    tune_params["block_size_x"] = [16 * i for i in range(1, 17)]
+    tune_params["block_size_y"] = [2**i for i in range(5)]
+    tune_params["tile_size_x"] = [i for i in range(1, 5)]
+    tune_params["tile_size_y"] = [i for i in range(1, 5)]
+    tune_params["read_only"] = [0, 1]  # toggle using the read-only cache
+
+    # do dry run
+    # tune_params["nvml_gr_clock"] = [2100]
+    # tune_params["block_size_x"] = [16]
+    # tune_params["block_size_y"] = [1]
+    # tune_params["tile_size_x"] = [1, 2, 4]
+    # tune_params["tile_size_y"] = [1]
+    # tune_params["read_only"] = [1]    #toggle using the read-only cache
+
+    tune_params["use_padding"] = [0, 1]  # toggle the insertion of padding in shared memory
+    tune_params["use_shmem"] = [0, 1]
+    tune_params["use_cmem"] = [1]
+    tune_params["filter_height"] = [filter_height]
+    tune_params["filter_width"] = [filter_width]
+
+    # limit the search to only use padding when its effective
+    restrict = [
+        "use_padding==0 or block_size_x % 32 != 0",
+        "block_size_x*block_size_y<=1024",
+        "use_padding==0 or use_shmem != 0",
+        "use_shmem == 0 or (((block_size_x*tile_size_x+(filter_width-1)))*((block_size_y*tile_size_y+(filter_height-1)))) < 12*1024",
+    ]
+
+    # print(restrict)
+
+    problem_size = (image_width, image_height)
+    size = numpy.prod(problem_size)
+    largest_fh = filter_height
+    largest_fw = filter_width
+    input_size = (problem_size[0] + largest_fw - 1) * (problem_size[1] + largest_fh - 1)
+
+    output_image = numpy.zeros(size).astype(numpy.float32)
+    input_image = numpy.random.randn(input_size).astype(numpy.float32)
+    filter_weights = numpy.random.randn(largest_fh * largest_fw).astype(numpy.float32)
+
+    cmem_args = {"d_filter": filter_weights}
+    args = [output_image, input_image, filter_weights]
+
+    grid_div_x = ["block_size_x", "tile_size_x"]
+    grid_div_y = ["block_size_y", "tile_size_y"]
+
+    total_flops = ops(*inputs)
+    metrics = OrderedDict()
+    metrics["GFLOP/s"] = lambda p: total_flops / (p["time"] / 1000.0)
+
+    def run():
+        return kernel_tuner.tune_kernel(
+            "convolution_kernel",
+            kernel_string,
+            problem_size,
+            args,
+            tune_params,
+            grid_div_y=grid_div_y,
+            grid_div_x=grid_div_x,
+            cmem_args=cmem_args,
+            restrictions=restrict,
+            cache=directory / f"cachefiles/convolution_milo/{device_name}.json",
+            metrics=metrics,
+            lang=lang,
+            iterations=32,
+            device=0,
+            verbose=verbose,
+            quiet=quiet,
+            strategy=strategy,
+            strategy_options=strategy_options,
+            simulation_mode=simulation_mode,
+        )
+
+    # start tuning
+    if profiling:
+        import cProfile
+
+        with cProfile.Profile() as pr:
+            results, env = run()
+            if profiling:
+                pr.dump_stats('bo_prof.prof')
+    else:
+        results, env = run()
+
+    
+    # store_output_file(file_path_results, results, tune_params)
+    # store_metadata_file(file_path_metadata)
+    # print(results)
+    # print(env)
+    return results, env
+
+
+if __name__ == "__main__":
+    # language = sys.argv[1]
+    # device_name = sys.argv[2]
+    language = "CUDA"
+    device_name = "A100"
+
+    # if len(sys.argv) != 2:
+    #     print("Usage: ./convolution.py [language ('HIP' or 'CUDA')] [device name]")
+    #     exit(1)
+
+    if language not in ("HIP", "CUDA"):
+        raise ValueError(f"{language} not valid, specify HIP or CUDA")
+
+    tune(device_name=device_name, lang=language)

From 176b8f566423d6142dba190985e82c8b25af8497 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 10:34:20 -0800
Subject: [PATCH 042/168] Added objective, tuning direction and hyperparameter
 tuning language selection support

---
 kernel_tuner/interface.py                     |  8 +++-
 .../schema/T1/1.0.0/input-schema.json         |  3 +-
 .../schema/T4/1.0.0/results-schema.json       |  6 ++-
 kernel_tuner/strategies/common.py             | 44 +++++++++++--------
 4 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index e9469ec6d..f48d105dc 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -863,6 +863,8 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names):
 def tune_kernel_T1(
     input_filepath: Path,
     cache_filepath: Path = None,
+    objective="time",
+    objective_higher_is_better=False,
     simulation_mode=False,
     output_T4=True,
     iterations=7,
@@ -945,7 +947,7 @@ def tune_kernel_T1(
             raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}")
 
     # tune with the converted inputs
-    # TODO add objective to tune_kernel and get_t4_results calls once available in T1
+    # TODO get_t4_results calls once available in T1
     results, env = tune_kernel(
         kernel_name,
         kernel_source,
@@ -966,9 +968,11 @@ def tune_kernel_T1(
         iterations=iterations,
         strategy=strategy,
         strategy_options=strategy_options,
+        objective=objective,
+        objective_higher_is_better=objective_higher_is_better,
     )
     if output_T4:
-        return get_t4_metadata(), get_t4_results(results, tune_params)
+        return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective)
     return results, env
 
 
diff --git a/kernel_tuner/schema/T1/1.0.0/input-schema.json b/kernel_tuner/schema/T1/1.0.0/input-schema.json
index bb53ee594..598a4b3d1 100644
--- a/kernel_tuner/schema/T1/1.0.0/input-schema.json
+++ b/kernel_tuner/schema/T1/1.0.0/input-schema.json
@@ -189,7 +189,8 @@
                     "enum": [
                         "OpenCL",
                         "CUDA",
-                        "Vulkan"
+                        "Vulkan",
+                        "Hypertuner"
                     ]
                 },
                 "CompilerOptions": {
diff --git a/kernel_tuner/schema/T4/1.0.0/results-schema.json b/kernel_tuner/schema/T4/1.0.0/results-schema.json
index 298f2662c..511042016 100644
--- a/kernel_tuner/schema/T4/1.0.0/results-schema.json
+++ b/kernel_tuner/schema/T4/1.0.0/results-schema.json
@@ -59,7 +59,11 @@
                                     "type": "string"
                                 },
                                 "value": {
-                                    "type": "number"
+                                    "type": [
+                                        "number",
+                                        "string",
+                                        "array"
+                                    ]
                                 },
                                 "unit": {
                                     "type": "string"
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 717d2ca7e..ed142d43c 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -30,7 +30,9 @@
 
 def get_strategy_docstring(name, strategy_options):
     """Generate docstring for a 'tune' method of a strategy."""
-    return _docstring_template.replace("$NAME$", name).replace("$STRAT_OPT$", make_strategy_options_doc(strategy_options))
+    return _docstring_template.replace("$NAME$", name).replace(
+        "$STRAT_OPT$", make_strategy_options_doc(strategy_options)
+    )
 
 
 def make_strategy_options_doc(strategy_options):
@@ -53,7 +55,9 @@ def get_options(strategy_options, options):
 
 
 class CostFunc:
-    def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False):
+    def __init__(
+        self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False
+    ):
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
@@ -61,7 +65,9 @@ def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
-            self.tuning_options['max_fevals'] = min(tuning_options['max_fevals'] if 'max_fevals' in tuning_options else np.inf, searchspace.size)
+            self.tuning_options["max_fevals"] = min(
+                tuning_options["max_fevals"] if "max_fevals" in tuning_options else np.inf, searchspace.size
+            )
         self.results = []
 
     def __call__(self, x, check_restrictions=True):
@@ -69,8 +75,8 @@ def __call__(self, x, check_restrictions=True):
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
 
         # error value to return for numeric optimizers that need a numerical value
-        logging.debug('_cost_func called')
-        logging.debug('x: ' + str(x))
+        logging.debug("_cost_func called")
+        logging.debug("x: " + str(x))
 
         # check if max_fevals is reached or time limit is exceeded
         util.check_stop_criterion(self.tuning_options)
@@ -83,7 +89,7 @@ def __call__(self, x, check_restrictions=True):
                 params = snap_to_nearest_config(x, self.searchspace.tune_params)
         else:
             params = x
-        logging.debug('params ' + str(params))
+        logging.debug("params " + str(params))
 
         legal = True
         result = {}
@@ -152,10 +158,10 @@ def get_bounds_x0_eps(self):
                     eps = min(eps, np.amin(np.gradient(vals)))
 
         self.tuning_options["eps"] = eps
-        logging.debug('get_bounds_x0_eps called')
-        logging.debug('bounds ' + str(bounds))
-        logging.debug('x0 ' + str(x0))
-        logging.debug('eps ' + str(eps))
+        logging.debug("get_bounds_x0_eps called")
+        logging.debug("bounds " + str(bounds))
+        logging.debug("x0 " + str(x0))
+        logging.debug("eps " + str(eps))
 
         return bounds, x0, eps
 
@@ -173,7 +179,7 @@ def setup_method_arguments(method, bounds):
     kwargs = {}
     # pass bounds to methods that support it
     if method in ["L-BFGS-B", "TNC", "SLSQP"]:
-        kwargs['bounds'] = bounds
+        kwargs["bounds"] = bounds
     return kwargs
 
 
@@ -186,21 +192,21 @@ def setup_method_options(method, tuning_options):
         maxiter = tuning_options.strategy_options.maxiter
     else:
         maxiter = 100
-    kwargs['maxiter'] = maxiter
+    kwargs["maxiter"] = maxiter
     if method in ["Nelder-Mead", "Powell"]:
-        kwargs['maxfev'] = maxiter
+        kwargs["maxfev"] = maxiter
     elif method == "L-BFGS-B":
-        kwargs['maxfun'] = maxiter
+        kwargs["maxfun"] = maxiter
 
     # pass eps to methods that support it
     if method in ["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"]:
-        kwargs['eps'] = tuning_options.eps
+        kwargs["eps"] = tuning_options.eps
     elif method == "COBYLA":
-        kwargs['rhobeg'] = tuning_options.eps
+        kwargs["rhobeg"] = tuning_options.eps
 
     # not all methods support 'disp' option
-    if method not in ['TNC']:
-        kwargs['disp'] = tuning_options.verbose
+    if method not in ["TNC"]:
+        kwargs["disp"] = tuning_options.verbose
 
     return kwargs
 
@@ -247,5 +253,5 @@ def scale_from_params(params, tune_params, eps):
     """Helper func to do the inverse of the 'unscale' function."""
     x = np.zeros(len(params))
     for i, v in enumerate(tune_params.values()):
-        x[i] = 0.5 * eps + v.index(params[i])*eps
+        x[i] = 0.5 * eps + v.index(params[i]) * eps
     return x

From 196af62d19b2a4540ed3d8edac623a4d793ce1be Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 18:16:44 -0800
Subject: [PATCH 043/168] Completed implementation of mixed-type handling and
 handling of invalid and  evaluated configurations

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index dd7c3e956..6f80126e3 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -38,8 +38,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         # set up conversion to tensors
         self.searchspace = searchspace
         self.searchspace_tensors = searchspace.get_tensorspace()
-        self.train_X = torch.empty_like(self.searchspace_tensors)
-        self.train_Y = torch.empty(len(self.train_X))
+        self.train_X = torch.empty(0)
+        self.train_Y = torch.empty(0)
 
         # # get bounds
         # bounds = []
@@ -50,10 +50,10 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result = self.cost_func(config)
-        valid = not isinstance(result, util.ErrorConfig)
+        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result)
         if not valid:
             result = np.nan
-        return result, valid
+        return [result], valid
 
     def evaluate_configs(self, X: Tensor):
         """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
@@ -67,16 +67,17 @@ def evaluate_configs(self, X: Tensor):
                 param_config = self.searchspace.tensor_to_param_config(config)
                 res, valid = self.run_config(param_config)
                 if valid:
-                    valid_configs.append([config])
-                    valid_results.append([res])
+                    valid_configs.append(config)
+                    valid_results.append(res)
                 
                 # remove evaluated configurations from the full searchspace
                 index = self.searchspace.get_param_config_index(param_config)
                 self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
 
             # add valid results to the training set
-            self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
-            self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
+            if len(valid_configs) > 0 and len(valid_results) > 0:
+                self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
+                self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         

From 55a5c1a221d24aedce4d351ba37c2fb15b81ac5d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 18:19:50 -0800
Subject: [PATCH 044/168] Added docstrings, improved formatting

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 6f80126e3..4f2613ca4 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -22,6 +22,7 @@
 
 
 def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
     bo = BayesianOptimization(searchspace, runner, tuning_options)
     return bo.run(max_fevals)
@@ -30,6 +31,7 @@ class BayesianOptimization():
     """Bayesian Optimization class."""
 
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        """Initialization of the Bayesian Optimization class. Does not evaluate configurations."""
         self.initial_sample_taken = False
         self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
@@ -41,12 +43,6 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.train_X = torch.empty(0)
         self.train_Y = torch.empty(0)
 
-        # # get bounds
-        # bounds = []
-        # for v in searchspace.params_values:
-        #     bounds.append([min(v), max(v)])
-        # bounds = torch.from_numpy(np.array(bounds).transpose())
-
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result = self.cost_func(config)
@@ -72,7 +68,8 @@ def evaluate_configs(self, X: Tensor):
                 
                 # remove evaluated configurations from the full searchspace
                 index = self.searchspace.get_param_config_index(param_config)
-                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], self.searchspace_tensors[index+1:]))
+                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], 
+                                                      self.searchspace_tensors[index+1:]))
 
             # add valid results to the training set
             if len(valid_configs) > 0 and len(valid_results) > 0:

From d64f783c04f26e8d239214d862ee7b60adfde678 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:09:34 -0800
Subject: [PATCH 045/168] Extended strategies test to test for ability to
 handle non-numeric and mixed parameter values

---
 test/strategies/test_strategies.py | 10 +++-
 test/test_cache_file.json          | 94 +++++++++++++++++++++++++-----
 2 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 11b231e62..4e4fbb8c1 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -31,6 +31,9 @@ def vector_add():
     args = [c, a, b, n]
     tune_params = dict()
     tune_params["block_size_x"] = [128 + 64 * i for i in range(15)]
+    tune_params["test_string"] = ["alg_1", "alg_2"]
+    tune_params["test_bool"] = [True, False]
+    tune_params["test_mixed"] = ["test", 1, True, 2.45]
 
     return ["vector_add", kernel_string, size, args, tune_params]
 
@@ -58,7 +61,9 @@ def test_strategies(vector_add, strategy):
         filter_options = options
     filter_options["max_fevals"] = 10
 
-    results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options,
+    restrictions = ["test_string == 'alg_2'", "test_bool == True", "test_mixed == 2.45"]
+
+    results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options,
                                          verbose=False, cache=cache_filename, simulation_mode=True)
 
     assert len(results) > 0
@@ -76,6 +81,9 @@ def test_strategies(vector_add, strategy):
     # check whether the returned dictionaries contain exactly the expected keys and the appropriate type
     expected_items = {
         'block_size_x': int,
+        'test_string': str,
+        'test_bool': bool,
+        'test_mixed': float,
         'time': (float, int),
         'times': list,
         'compile_time': (float, int),
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
index 3299441c5..5e0c0e054 100644
--- a/test/test_cache_file.json
+++ b/test/test_cache_file.json
@@ -2,7 +2,10 @@
     "device_name": "NVIDIA RTX A4000",
     "kernel_name": "vector_add",
     "tune_params_keys": [
-        "block_size_x"
+        "block_size_x",
+        "test_string",
+        "test_bool",
+        "test_mixed"
     ],
     "tune_params": {
         "block_size_x": [
@@ -21,11 +24,28 @@
             896,
             960,
             1024
+        ],
+        "test_string": [
+            "alg_1",
+            "alg_2"
+        ],
+        "test_bool": [
+            true,
+            false
+        ],
+        "test_mixed": [
+            "test",
+            1,
+            true,
+            2.45
         ]
     },
     "cache": {
-        "128": {
+        "128,alg_2,True,2.45": {
             "block_size_x": 128,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04073600071881499,
             "times": [
                 0.1268800050020218,
@@ -43,8 +63,11 @@
             "framework_time": 0.8587837219238281,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "192": {
+        "192,alg_2,True,2.45": {
             "block_size_x": 192,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04095085710287094,
             "times": [
                 0.12908799946308136,
@@ -62,8 +85,11 @@
             "framework_time": 1.6656816005706787,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "256": {
+        "256,alg_2,True,2.45": {
             "block_size_x": 256,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.13180799782276154,
@@ -81,8 +107,11 @@
             "framework_time": 1.6054585576057434,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "320": {
+        "320,alg_2,True,2.45": {
             "block_size_x": 320,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04208914376795292,
             "times": [
                 0.1358720064163208,
@@ -100,8 +129,11 @@
             "framework_time": 1.4494173228740692,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "384": {
+        "384,alg_2,True,2.45": {
             "block_size_x": 384,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04174171467976911,
             "times": [
                 0.13251200318336487,
@@ -119,8 +151,11 @@
             "framework_time": 1.682564616203308,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "448": {
+        "448,alg_2,True,2.45": {
             "block_size_x": 448,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.03249828570655414,
             "times": [
                 0.0647680014371872,
@@ -138,8 +173,11 @@
             "framework_time": 1.5890561044216156,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "512": {
+        "512,alg_2,True,2.45": {
             "block_size_x": 512,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04139885599059718,
             "times": [
                 0.13023999333381653,
@@ -157,8 +195,11 @@
             "framework_time": 1.853298395872116,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "576": {
+        "576,alg_2,True,2.45": {
             "block_size_x": 576,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.12995199859142303,
@@ -176,8 +217,11 @@
             "framework_time": 1.8403716385364532,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "640": {
+        "640,alg_2,True,2.45": {
             "block_size_x": 640,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.0411702852163996,
             "times": [
                 0.12796799838542938,
@@ -195,8 +239,11 @@
             "framework_time": 1.8264725804328918,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "704": {
+        "704,alg_2,True,2.45": {
             "block_size_x": 704,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04184228580977235,
             "times": [
                 0.1343040019273758,
@@ -214,8 +261,11 @@
             "framework_time": 1.6709677875041962,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "768": {
+        "768,alg_2,True,2.45": {
             "block_size_x": 768,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.03175771422684193,
             "times": [
                 0.06230400130152702,
@@ -233,8 +283,11 @@
             "framework_time": 1.7531625926494598,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "832": {
+        "832,alg_2,True,2.45": {
             "block_size_x": 832,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.040941715240478516,
             "times": [
                 0.12998400628566742,
@@ -252,8 +305,11 @@
             "framework_time": 2.1368376910686493,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "896": {
+        "896,alg_2,True,2.45": {
             "block_size_x": 896,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04177371359297207,
             "times": [
                 0.12931199371814728,
@@ -271,8 +327,11 @@
             "framework_time": 2.03637033700943,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "960": {
+        "960,alg_2,True,2.45": {
             "block_size_x": 960,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.042189714631864,
             "times": [
                 0.1335040032863617,
@@ -290,8 +349,11 @@
             "framework_time": 1.7383433878421783,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "1024": {
+        "1024,alg_2,True,2.45": {
             "block_size_x": 1024,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
             "time": 0.04114742816558906,
             "times": [
                 0.13087999820709229,

From e95ab30ca2d5bdd885616a0f3ee1ffb7b05dc475 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:10:39 -0800
Subject: [PATCH 046/168] Mixed-type parameters are not converted to numeric
 constraints

---
 kernel_tuner/util.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index e8d194e11..dac5d6de4 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -889,7 +889,7 @@ def to_numeric_constraint(
         if len(comparators_found) != 1:
             return None
         comparator = comparators_found[0]
-
+    
         # split the string on the comparison and remove leading and trailing whitespace
         left, right = tuple(s.strip() for s in restriction.split(comparator))
 
@@ -1032,7 +1032,8 @@ def to_equality_constraint(
                 ):
                     parsed_restriction = parsed_restriction[1:-1]
                 # check if we can turn this into the built-in numeric comparison constraint
-                finalized_constraint = to_numeric_constraint(parsed_restriction, params_used)
+                if all(all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param]) for param in params_used):
+                    finalized_constraint = to_numeric_constraint(parsed_restriction, params_used)
                 if finalized_constraint is None:
                     # check if we can turn this into the built-in equality comparison constraint
                     finalized_constraint = to_equality_constraint(parsed_restriction, params_used)

From 10a6a5c1557a3bfefa2218657136e05f944a6fea Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:11:58 -0800
Subject: [PATCH 047/168] CostFunc can now encode and decode non-numeric
 configurations for strategies that require only numerics

---
 kernel_tuner/strategies/common.py | 53 +++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index ed142d43c..9c2623132 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,4 +1,5 @@
 import logging
+import numbers
 import sys
 from time import perf_counter
 
@@ -56,11 +57,24 @@ def get_options(strategy_options, options):
 
 class CostFunc:
     def __init__(
-        self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True, return_invalid=False
+        self, searchspace: Searchspace, tuning_options, runner, *, 
+        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False
     ):
+        """An abstract method to handle evaluation of configurations.
+
+        Args:
+            searchspace: the Searchspace to evaluate on.
+            tuning_options: various tuning options.
+            runner: the runner to use.
+            scaling: whether to internally scale parameter values. Defaults to False.
+            snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True.
+            encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
+            return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
+        """        
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
+        self.encode_non_numeric = encode_non_numeric
         self.return_invalid = return_invalid
         self.searchspace = searchspace
         self.tuning_options = tuning_options
@@ -70,9 +84,24 @@ def __init__(
             )
         self.results = []
 
+        # if enabled, encode non-numeric parameter values as a numeric value
+        if self.encode_non_numeric:
+            self._map_param_to_encoded = {}
+            self._map_encoded_to_param = {}
+            self.encoded_params_values = []
+            for i, param_values in enumerate(self.searchspace.params_values):
+                encoded_values = param_values
+                if not all(isinstance(v, numbers.Real) for v in param_values):
+                    encoded_values = np.arange(len(param_values))
+                    self._map_param_to_encoded[i] = dict(zip(param_values, encoded_values))
+                    self._map_encoded_to_param[i] = dict(zip(encoded_values, param_values))
+                self.encoded_params_values.append(encoded_values)
+
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
+        if self.encode_non_numeric:
+            x = self.encoded_to_params(x)
 
         # error value to return for numeric optimizers that need a numerical value
         logging.debug("_cost_func called")
@@ -168,10 +197,30 @@ def get_bounds_x0_eps(self):
     def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
-        for values in self.searchspace.tune_params.values():
+        for values in self.encoded_params_values if self.encode_non_numeric else  self.searchspace.params_values:
             sorted_values = np.sort(values)
             bounds.append((sorted_values[0], sorted_values[-1]))
         return bounds
+    
+    def encoded_to_params(self, config):
+        """Convert from an encoded configuration to the real parameters."""
+        if not self.encode_non_numeric:
+            raise ValueError("'encode_non_numeric' must be set to true to use this function.")
+        params = []
+        for i, v in enumerate(config):
+            params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
+        assert len(params) == len(config)            
+        return params
+    
+    def params_to_encoded(self, config):
+        """Convert from a parameter configuration to the encoded configuration."""
+        if not self.encode_non_numeric:
+            raise ValueError("'encode_non_numeric' must be set to true to use this function.")
+        encoded = []
+        for i, v in enumerate(config):
+            encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
+        assert len(encoded) == len(config)            
+        return encoded
 
 
 def setup_method_arguments(method, bounds):

From 6ae3ba65d010a7aeb10b38eb1007b25ebdbc6760 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:16:33 -0800
Subject: [PATCH 048/168] Fixed logging statements, improved formatting

---
 kernel_tuner/strategies/common.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 9c2623132..28b36c84f 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,3 +1,5 @@
+"""Module for functionality that is commonly used throughout Kernel Tuner."""
+
 import logging
 import numbers
 import sys
@@ -56,6 +58,8 @@ def get_options(strategy_options, options):
 
 
 class CostFunc:
+    """Class encapsulating the CostFunc method."""
+
     def __init__(
         self, searchspace: Searchspace, tuning_options, runner, *, 
         scaling=False, snap=True, encode_non_numeric=False, return_invalid=False
@@ -105,7 +109,7 @@ def __call__(self, x, check_restrictions=True):
 
         # error value to return for numeric optimizers that need a numerical value
         logging.debug("_cost_func called")
-        logging.debug("x: " + str(x))
+        logging.debug("x: %s", str(x))
 
         # check if max_fevals is reached or time limit is exceeded
         util.check_stop_criterion(self.tuning_options)
@@ -118,7 +122,7 @@ def __call__(self, x, check_restrictions=True):
                 params = snap_to_nearest_config(x, self.searchspace.tune_params)
         else:
             params = x
-        logging.debug("params " + str(params))
+        logging.debug("params %s", str(params))
 
         legal = True
         result = {}
@@ -188,9 +192,9 @@ def get_bounds_x0_eps(self):
 
         self.tuning_options["eps"] = eps
         logging.debug("get_bounds_x0_eps called")
-        logging.debug("bounds " + str(bounds))
-        logging.debug("x0 " + str(x0))
-        logging.debug("eps " + str(eps))
+        logging.debug("bounds %s", str(bounds))
+        logging.debug("x0 %s", str(x0))
+        logging.debug("eps %s", str(eps))
 
         return bounds, x0, eps
 

From 4873a20c59b60325affc56af1fcc093787754356 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:18:34 -0800
Subject: [PATCH 049/168] Improved the performance of get_bounds

---
 kernel_tuner/strategies/common.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 28b36c84f..5f64618d5 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -201,9 +201,8 @@ def get_bounds_x0_eps(self):
     def get_bounds(self):
         """Create a bounds array from the tunable parameters."""
         bounds = []
-        for values in self.encoded_params_values if self.encode_non_numeric else  self.searchspace.params_values:
-            sorted_values = np.sort(values)
-            bounds.append((sorted_values[0], sorted_values[-1]))
+        for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
+            bounds.append((min(values), max(values)))
         return bounds
     
     def encoded_to_params(self, config):

From bae7e9678711e499561f00030346689cfc55db7f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:20:02 -0800
Subject: [PATCH 050/168] Applied non-numeric encoding in differential
 evolution to handle non-numeric parameter values

---
 kernel_tuner/strategies/diff_evo.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 5ad2b9474..62e966f33 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -6,7 +6,8 @@
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
 
-supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
+supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp",
+                     "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
 
 _options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"),
                        popsize=("Population size", 20),
@@ -18,17 +19,18 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     method, popsize, maxiter = common.get_options(tuning_options.strategy_options, _options)
 
-    # build a bounds array as needed for the optimizer
-    cost_func = CostFunc(searchspace, tuning_options, runner)
+    # build a bounds array as needed for the optimizer, and encode because it can't handle non-numeric values
+    cost_func = CostFunc(searchspace, tuning_options, runner, encode_non_numeric=True)
     bounds = cost_func.get_bounds()
 
     # ensure particles start from legal points
     population = list(list(p) for p in searchspace.get_random_sample(popsize))
+    population_enc = [cost_func.params_to_encoded(c) for c in population]
 
     # call the differential evolution optimizer
     opt_result = None
     try:
-        opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population,
+        opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population_enc,
                                         polish=False, strategy=method, disp=tuning_options.verbose)
     except util.StopCriterionReached as e:
         if tuning_options.verbose:

From 7eb7ef7b86c50f26d5a65beadd69baf6492ad020 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 20:50:53 -0800
Subject: [PATCH 051/168] Implemented automatic conversion to multiple types
 for encoded tensor parameter lookup

---
 kernel_tuner/searchspace.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 69738bc12..c18a7518c 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -623,7 +623,17 @@ def param_config_to_tensor(self, param_config: tuple):
             self.initialize_tensorspace()
         array = []
         for i, param in enumerate(param_config):
-            array.append(self._map_param_to_tensor[i][param])
+            mapping = self._map_param_to_tensor[i]
+            conversions = [None, str, float, int, bool]
+            for c in conversions:
+                try:
+                    c_param = param if c is None else c(param)
+                    array.append(mapping[c_param])
+                    break
+                except (KeyError, ValueError) as e:
+                    if c == conversions[-1]:
+                        raise KeyError(f"No variant of {param} could be found in {mapping}") from e
+
         # TODO write tests
         return torch.from_numpy(np.array(array))
     

From 91d3ce4f8b87d7bff1b963297d4fc9c666fb1243 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:13:49 -0800
Subject: [PATCH 052/168] Added tests for Searchspace tensor encoding and
 conversion

---
 kernel_tuner/searchspace.py       |  3 ---
 kernel_tuner/strategies/common.py |  2 +-
 test/test_searchspace.py          | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index c18a7518c..6b3e54e21 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -633,8 +633,6 @@ def param_config_to_tensor(self, param_config: tuple):
                 except (KeyError, ValueError) as e:
                     if c == conversions[-1]:
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
-
-        # TODO write tests
         return torch.from_numpy(np.array(array))
     
     def tensor_to_param_config(self, tensor: Tensor):
@@ -645,7 +643,6 @@ def tensor_to_param_config(self, tensor: Tensor):
         config = []
         for i, param in enumerate(tensor):
             config.append(self._map_tensor_to_param[i][float(param)])
-        # TODO write tests
         return tuple(config)
 
     def __prepare_neighbors_index(self):
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 5f64618d5..7901f97a0 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -1,4 +1,4 @@
-"""Module for functionality that is commonly used throughout Kernel Tuner."""
+"""Module for functionality that is commonly used throughout the strategies."""
 
 import logging
 import numbers
diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 8672c1d03..48f049750 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -177,6 +177,21 @@ def test_param_index_lookup():
     assert simple_searchspace.get_param_indices(last) == (3, 1, 1)
 
 
+def test_get_tensorspace():
+    """Test the generation of a tensor space."""
+    tensorspace = simple_searchspace.get_tensorspace()
+    assert tensorspace.shape == simple_searchspace.get_list_numpy().shape
+
+
+def test_conversion_tensor_param_config():
+    """Test the conversion from a parameter configuration to a tensor and tensor to parameter configuration."""
+    for config in simple_searchspace.list:
+        tensor = simple_searchspace.param_config_to_tensor(config)
+        config_2 = simple_searchspace.tensor_to_param_config(tensor)
+        assert config == config_2
+        assert tensor.equal(simple_searchspace.param_config_to_tensor(config_2))
+
+
 def test_random_sample():
     """Test whether the random sample indices exists and are unique, and if it throws an error for too many samples."""
     random_sample_indices = searchspace.get_random_sample_indices(100)

From 80d514e65b9038364723a6d89e05fe93a9ea6b81 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:19:27 -0800
Subject: [PATCH 053/168] Seperated strategies and runners test cache file

---
 .gitignore                           |   1 +
 test/strategies/test_cache_file.json | 375 +++++++++++++++++++++++++++
 test/strategies/test_strategies.py   |   2 +-
 test/test_cache_file.json            |  94 ++-----
 4 files changed, 393 insertions(+), 79 deletions(-)
 create mode 100644 test/strategies/test_cache_file.json

diff --git a/.gitignore b/.gitignore
index eb59e44cb..ce4873209 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ push_to_pypi.sh
 *.json
 !kernel_tuner/schema/T1/1.0.0/input-schema.json
 !test/test_T1_input.json
+!test_cache_file.json
 *.csv
 .cache
 *.ipynb_checkpoints
diff --git a/test/strategies/test_cache_file.json b/test/strategies/test_cache_file.json
new file mode 100644
index 000000000..5e0c0e054
--- /dev/null
+++ b/test/strategies/test_cache_file.json
@@ -0,0 +1,375 @@
+{
+    "device_name": "NVIDIA RTX A4000",
+    "kernel_name": "vector_add",
+    "tune_params_keys": [
+        "block_size_x",
+        "test_string",
+        "test_bool",
+        "test_mixed"
+    ],
+    "tune_params": {
+        "block_size_x": [
+            128,
+            192,
+            256,
+            320,
+            384,
+            448,
+            512,
+            576,
+            640,
+            704,
+            768,
+            832,
+            896,
+            960,
+            1024
+        ],
+        "test_string": [
+            "alg_1",
+            "alg_2"
+        ],
+        "test_bool": [
+            true,
+            false
+        ],
+        "test_mixed": [
+            "test",
+            1,
+            true,
+            2.45
+        ]
+    },
+    "cache": {
+        "128,alg_2,True,2.45": {
+            "block_size_x": 128,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04073600071881499,
+            "times": [
+                0.1268800050020218,
+                0.031072000041604042,
+                0.027295999228954315,
+                0.025472000241279602,
+                0.025119999423623085,
+                0.025248000398278236,
+                0.024064000695943832
+            ],
+            "compile_time": 440.9545585513115,
+            "verification_time": 0,
+            "benchmark_time": 1.091592013835907,
+            "strategy_time": 0,
+            "framework_time": 0.8587837219238281,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "192,alg_2,True,2.45": {
+            "block_size_x": 192,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04095085710287094,
+            "times": [
+                0.12908799946308136,
+                0.03046399913728237,
+                0.027744000777602196,
+                0.025151999667286873,
+                0.024960000067949295,
+                0.024992000311613083,
+                0.02425600029528141
+            ],
+            "compile_time": 436.15153804421425,
+            "verification_time": 0,
+            "benchmark_time": 1.0972395539283752,
+            "strategy_time": 0,
+            "framework_time": 1.6656816005706787,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "256,alg_2,True,2.45": {
+            "block_size_x": 256,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04189257137477398,
+            "times": [
+                0.13180799782276154,
+                0.031136000528931618,
+                0.028095999732613564,
+                0.027008000761270523,
+                0.025087999179959297,
+                0.02505600079894066,
+                0.02505600079894066
+            ],
+            "compile_time": 436.5839697420597,
+            "verification_time": 0,
+            "benchmark_time": 1.0691732168197632,
+            "strategy_time": 0,
+            "framework_time": 1.6054585576057434,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "320,alg_2,True,2.45": {
+            "block_size_x": 320,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04208914376795292,
+            "times": [
+                0.1358720064163208,
+                0.030688000842928886,
+                0.02768000029027462,
+                0.02582399919629097,
+                0.025087999179959297,
+                0.025312000885605812,
+                0.024159999564290047
+            ],
+            "compile_time": 438.9761835336685,
+            "verification_time": 0,
+            "benchmark_time": 1.0976120829582214,
+            "strategy_time": 0,
+            "framework_time": 1.4494173228740692,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "384,alg_2,True,2.45": {
+            "block_size_x": 384,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04174171467976911,
+            "times": [
+                0.13251200318336487,
+                0.03167999908328056,
+                0.027871999889612198,
+                0.025312000885605812,
+                0.024671999737620354,
+                0.02505600079894066,
+                0.025087999179959297
+            ],
+            "compile_time": 440.71199372410774,
+            "verification_time": 0,
+            "benchmark_time": 1.0499358177185059,
+            "strategy_time": 0,
+            "framework_time": 1.682564616203308,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "448,alg_2,True,2.45": {
+            "block_size_x": 448,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.03249828570655414,
+            "times": [
+                0.0647680014371872,
+                0.03167999908328056,
+                0.028255999088287354,
+                0.025280000641942024,
+                0.027103999629616737,
+                0.02550400048494339,
+                0.02489599958062172
+            ],
+            "compile_time": 449.13655519485474,
+            "verification_time": 0,
+            "benchmark_time": 1.1196956038475037,
+            "strategy_time": 0,
+            "framework_time": 1.5890561044216156,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "512,alg_2,True,2.45": {
+            "block_size_x": 512,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04139885599059718,
+            "times": [
+                0.13023999333381653,
+                0.031136000528931618,
+                0.02831999957561493,
+                0.02595200017094612,
+                0.024607999250292778,
+                0.025151999667286873,
+                0.024383999407291412
+            ],
+            "compile_time": 440.5844733119011,
+            "verification_time": 0,
+            "benchmark_time": 1.09076127409935,
+            "strategy_time": 0,
+            "framework_time": 1.853298395872116,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "576,alg_2,True,2.45": {
+            "block_size_x": 576,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04189257137477398,
+            "times": [
+                0.12995199859142303,
+                0.03200000151991844,
+                0.028511999174952507,
+                0.026623999699950218,
+                0.025760000571608543,
+                0.02537599951028824,
+                0.02502400055527687
+            ],
+            "compile_time": 442.16764718294144,
+            "verification_time": 0,
+            "benchmark_time": 1.1038780212402344,
+            "strategy_time": 0,
+            "framework_time": 1.8403716385364532,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "640,alg_2,True,2.45": {
+            "block_size_x": 640,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.0411702852163996,
+            "times": [
+                0.12796799838542938,
+                0.03081599995493889,
+                0.02969600073993206,
+                0.025439999997615814,
+                0.02409599907696247,
+                0.02582399919629097,
+                0.024351999163627625
+            ],
+            "compile_time": 437.98910081386566,
+            "verification_time": 0,
+            "benchmark_time": 1.0496266186237335,
+            "strategy_time": 0,
+            "framework_time": 1.8264725804328918,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "704,alg_2,True,2.45": {
+            "block_size_x": 704,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04184228580977235,
+            "times": [
+                0.1343040019273758,
+                0.03094400092959404,
+                0.02908799983561039,
+                0.025151999667286873,
+                0.02486399933695793,
+                0.024447999894618988,
+                0.02409599907696247
+            ],
+            "compile_time": 443.51235404610634,
+            "verification_time": 0,
+            "benchmark_time": 1.1033527553081512,
+            "strategy_time": 0,
+            "framework_time": 1.6709677875041962,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "768,alg_2,True,2.45": {
+            "block_size_x": 768,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.03175771422684193,
+            "times": [
+                0.06230400130152702,
+                0.0315839983522892,
+                0.02831999957561493,
+                0.02672000043094158,
+                0.023679999634623528,
+                0.023903999477624893,
+                0.02579200081527233
+            ],
+            "compile_time": 450.4409395158291,
+            "verification_time": 0,
+            "benchmark_time": 1.101326197385788,
+            "strategy_time": 0,
+            "framework_time": 1.7531625926494598,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "832,alg_2,True,2.45": {
+            "block_size_x": 832,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.040941715240478516,
+            "times": [
+                0.12998400628566742,
+                0.03094400092959404,
+                0.027103999629616737,
+                0.024768000468611717,
+                0.025439999997615814,
+                0.023903999477624893,
+                0.024447999894618988
+            ],
+            "compile_time": 439.9200603365898,
+            "verification_time": 0,
+            "benchmark_time": 1.0421127080917358,
+            "strategy_time": 0,
+            "framework_time": 2.1368376910686493,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "896,alg_2,True,2.45": {
+            "block_size_x": 896,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04177371359297207,
+            "times": [
+                0.12931199371814728,
+                0.03731200098991394,
+                0.02812799997627735,
+                0.02502400055527687,
+                0.02412799932062626,
+                0.024768000468611717,
+                0.023744000121951103
+            ],
+            "compile_time": 439.23527002334595,
+            "verification_time": 0,
+            "benchmark_time": 1.0946877300739288,
+            "strategy_time": 0,
+            "framework_time": 2.03637033700943,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "960,alg_2,True,2.45": {
+            "block_size_x": 960,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.042189714631864,
+            "times": [
+                0.1335040032863617,
+                0.031039999797940254,
+                0.02876799926161766,
+                0.02579200081527233,
+                0.025119999423623085,
+                0.02566399984061718,
+                0.025439999997615814
+            ],
+            "compile_time": 441.7596235871315,
+            "verification_time": 0,
+            "benchmark_time": 1.1166557669639587,
+            "strategy_time": 0,
+            "framework_time": 1.7383433878421783,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        },
+        "1024,alg_2,True,2.45": {
+            "block_size_x": 1024,
+            "test_string": "alg_2",
+            "test_bool": true,
+            "test_mixed": 2.45,
+            "time": 0.04114742816558906,
+            "times": [
+                0.13087999820709229,
+                0.03049599938094616,
+                0.027936000376939774,
+                0.02486399933695793,
+                0.0244159996509552,
+                0.024320000782608986,
+                0.025119999423623085
+            ],
+            "compile_time": 442.8337663412094,
+            "verification_time": 0,
+            "benchmark_time": 1.0683201253414154,
+            "strategy_time": 0,
+            "framework_time": 1.9918642938137054,
+            "timestamp": "2022-12-23 12:11:26.411558+00:00"
+        }
+    }
+}
\ No newline at end of file
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 4e4fbb8c1..b7b2851dd 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -9,7 +9,7 @@
 
 from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
 
-cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/../test_cache_file.json"
+cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
 
 @pytest.fixture
 def vector_add():
diff --git a/test/test_cache_file.json b/test/test_cache_file.json
index 5e0c0e054..3299441c5 100644
--- a/test/test_cache_file.json
+++ b/test/test_cache_file.json
@@ -2,10 +2,7 @@
     "device_name": "NVIDIA RTX A4000",
     "kernel_name": "vector_add",
     "tune_params_keys": [
-        "block_size_x",
-        "test_string",
-        "test_bool",
-        "test_mixed"
+        "block_size_x"
     ],
     "tune_params": {
         "block_size_x": [
@@ -24,28 +21,11 @@
             896,
             960,
             1024
-        ],
-        "test_string": [
-            "alg_1",
-            "alg_2"
-        ],
-        "test_bool": [
-            true,
-            false
-        ],
-        "test_mixed": [
-            "test",
-            1,
-            true,
-            2.45
         ]
     },
     "cache": {
-        "128,alg_2,True,2.45": {
+        "128": {
             "block_size_x": 128,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04073600071881499,
             "times": [
                 0.1268800050020218,
@@ -63,11 +43,8 @@
             "framework_time": 0.8587837219238281,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "192,alg_2,True,2.45": {
+        "192": {
             "block_size_x": 192,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04095085710287094,
             "times": [
                 0.12908799946308136,
@@ -85,11 +62,8 @@
             "framework_time": 1.6656816005706787,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "256,alg_2,True,2.45": {
+        "256": {
             "block_size_x": 256,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.13180799782276154,
@@ -107,11 +81,8 @@
             "framework_time": 1.6054585576057434,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "320,alg_2,True,2.45": {
+        "320": {
             "block_size_x": 320,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04208914376795292,
             "times": [
                 0.1358720064163208,
@@ -129,11 +100,8 @@
             "framework_time": 1.4494173228740692,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "384,alg_2,True,2.45": {
+        "384": {
             "block_size_x": 384,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04174171467976911,
             "times": [
                 0.13251200318336487,
@@ -151,11 +119,8 @@
             "framework_time": 1.682564616203308,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "448,alg_2,True,2.45": {
+        "448": {
             "block_size_x": 448,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.03249828570655414,
             "times": [
                 0.0647680014371872,
@@ -173,11 +138,8 @@
             "framework_time": 1.5890561044216156,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "512,alg_2,True,2.45": {
+        "512": {
             "block_size_x": 512,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04139885599059718,
             "times": [
                 0.13023999333381653,
@@ -195,11 +157,8 @@
             "framework_time": 1.853298395872116,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "576,alg_2,True,2.45": {
+        "576": {
             "block_size_x": 576,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04189257137477398,
             "times": [
                 0.12995199859142303,
@@ -217,11 +176,8 @@
             "framework_time": 1.8403716385364532,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "640,alg_2,True,2.45": {
+        "640": {
             "block_size_x": 640,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.0411702852163996,
             "times": [
                 0.12796799838542938,
@@ -239,11 +195,8 @@
             "framework_time": 1.8264725804328918,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "704,alg_2,True,2.45": {
+        "704": {
             "block_size_x": 704,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04184228580977235,
             "times": [
                 0.1343040019273758,
@@ -261,11 +214,8 @@
             "framework_time": 1.6709677875041962,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "768,alg_2,True,2.45": {
+        "768": {
             "block_size_x": 768,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.03175771422684193,
             "times": [
                 0.06230400130152702,
@@ -283,11 +233,8 @@
             "framework_time": 1.7531625926494598,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "832,alg_2,True,2.45": {
+        "832": {
             "block_size_x": 832,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.040941715240478516,
             "times": [
                 0.12998400628566742,
@@ -305,11 +252,8 @@
             "framework_time": 2.1368376910686493,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "896,alg_2,True,2.45": {
+        "896": {
             "block_size_x": 896,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04177371359297207,
             "times": [
                 0.12931199371814728,
@@ -327,11 +271,8 @@
             "framework_time": 2.03637033700943,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "960,alg_2,True,2.45": {
+        "960": {
             "block_size_x": 960,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.042189714631864,
             "times": [
                 0.1335040032863617,
@@ -349,11 +290,8 @@
             "framework_time": 1.7383433878421783,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "1024,alg_2,True,2.45": {
+        "1024": {
             "block_size_x": 1024,
-            "test_string": "alg_2",
-            "test_bool": true,
-            "test_mixed": 2.45,
             "time": 0.04114742816558906,
             "times": [
                 0.13087999820709229,

From a489252d26f9987cccb0c02a5b21963a47acba93 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:31:25 -0800
Subject: [PATCH 054/168] Implemented handling of categorical parameters

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 4f2613ca4..d0c56476e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -6,7 +6,7 @@
     import torch
     from botorch import fit_gpytorch_mll
     from botorch.acquisition import ExpectedImprovement
-    from botorch.models import SingleTaskGP
+    from botorch.models import MixedSingleTaskGP, SingleTaskGP
     from botorch.optim import optimize_acqf_discrete
     from gpytorch.mlls import ExactMarginalLogLikelihood
     from torch import Tensor
@@ -87,7 +87,10 @@ def initial_sample(self):
 
     def initialize_model(self, state_dict=None):
         """Initialize the model, possibly with a state dict for faster fitting."""
-        model = SingleTaskGP(self.train_X, self.train_Y)
+        if len(self.searchspace.tensor_categorical_dimensions) == 0:
+            model = SingleTaskGP(self.train_X, self.train_Y)
+        else:
+            model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
         mll = ExactMarginalLogLikelihood(model.likelihood, model)
         # SumMarginalLogLikelihood
         if state_dict is not None:
@@ -110,7 +113,6 @@ def run(self, max_fevals: int):
                 ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
-                # TODO look into how to handle categorical parameters with MixedSingleTaskGP
                 candidate, _ = optimize_acqf_discrete(
                     ei, 
                     q=1, 

From 68aee140f336672cc723617c4f03ff70ac3b6c1f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 7 Nov 2024 21:49:53 -0800
Subject: [PATCH 055/168] Implemented variational GP and likelihood

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 30 +++++++++++++-------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index d0c56476e..38ea837e5 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -6,9 +6,9 @@
     import torch
     from botorch import fit_gpytorch_mll
     from botorch.acquisition import ExpectedImprovement
-    from botorch.models import MixedSingleTaskGP, SingleTaskGP
+    from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.optim import optimize_acqf_discrete
-    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
     bayes_opt_present = True
 except ImportError:
@@ -85,16 +85,26 @@ def initial_sample(self):
         self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
 
-    def initialize_model(self, state_dict=None):
-        """Initialize the model, possibly with a state dict for faster fitting."""
-        if len(self.searchspace.tensor_categorical_dimensions) == 0:
-            model = SingleTaskGP(self.train_X, self.train_Y)
+    def initialize_model(self, state_dict=None, exact=True):
+        """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
+        # initialize the model
+        if exact:
+            if len(self.searchspace.tensor_categorical_dimensions) == 0:
+                model = SingleTaskGP(self.train_X, self.train_Y)
+            else:
+                model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
         else:
-            model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
-        mll = ExactMarginalLogLikelihood(model.likelihood, model)
-        # SumMarginalLogLikelihood
-        if state_dict is not None:
+            model = SingleTaskVariationalGP(self.train_X, self.train_Y)
+
+        # load the previous state
+        if exact and state_dict is not None:
             model.load_state_dict(state_dict)
+
+        # initialize the likelihood
+        if exact:
+            mll = ExactMarginalLogLikelihood(model.likelihood, model)
+        else:
+            mll = VariationalELBO(model.likelihood, model.model, num_data=self.train_Y.size(0))
         return mll, model
 
     def run(self, max_fevals: int):

From b9c012dc29d4983fe5c330efe598f06837a61e74 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 8 Nov 2024 15:55:56 -0800
Subject: [PATCH 056/168] Using LogExpectedImprovement to avoid stability
 issues

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 38ea837e5..68028d72c 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -5,7 +5,7 @@
 try:
     import torch
     from botorch import fit_gpytorch_mll
-    from botorch.acquisition import ExpectedImprovement
+    from botorch.acquisition import LogExpectedImprovement
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.optim import optimize_acqf_discrete
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
@@ -120,7 +120,7 @@ def run(self, max_fevals: int):
                 fit_gpytorch_mll(mll)
                 
                 # Define the acquisition function
-                ei = ExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                ei = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 
                 # Optimize acquisition function to find the next evaluation point
                 candidate, _ = optimize_acqf_discrete(

From 41ce663aa9425b43b1861c6ad4c0a4bff9140e57 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 8 Nov 2024 23:13:41 -0800
Subject: [PATCH 057/168] Implemented tensor space bounds in searchspace

---
 kernel_tuner/searchspace.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6b3e54e21..7e9315d06 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -58,8 +58,10 @@ def __init__(
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
-        self.tensorspace = None
-        self.tensor_categorical_dimensions = []
+        self._tensorspace = None
+        self._tensorspace_bounds = None
+        self._tensorspace_bounds_indices = []
+        self._tensorspace_categorical_dimensions = []
         self._map_tensor_to_param = []
         self._map_param_to_tensor = []
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -594,28 +596,42 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
     
     def initialize_tensorspace(self):
         """Encode the searchspace as floats in a Tensor. Save the mapping."""
-        assert self.tensorspace is None, "Tensorspace is already initialized"
+        assert self._tensorspace is None, "Tensorspace is already initialized"
+        bounds = []
 
         # generate the mappings to and from tensor values
         for index, param_values in enumerate(self.params_values):
+            # convert numericals to float, or encode categorical
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = np.array(param_values).astype(float)
             else:
-                self.tensor_categorical_dimensions.append(index)
+                self._tensorspace_categorical_dimensions.append(index)
                 tensor_values = np.arange(len(param_values))
+
             self._map_param_to_tensor.append(dict(zip(param_values, tensor_values)))
             self._map_tensor_to_param.append(dict(zip(tensor_values, param_values)))
+            bounds.append((tensor_values.min(), tensor_values.max()))
+            if tensor_values.min() < tensor_values.max():
+                self._tensorspace_bounds_indices.append(index)
 
         # apply the mappings on the full searchspace
         numpy_repr = self.get_list_numpy()
         numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
-        self.tensorspace = torch.from_numpy(numpy_repr.astype(float))
+        self._tensorspace = torch.from_numpy(numpy_repr.astype(float))
+
+        # set the bounds in the correct format (one array for the min, one for the max)
+        bounds = torch.from_numpy(np.array(bounds))
+        self._tensorspace_bounds = torch.cat([bounds[:,0], bounds[:,1]]).reshape((2, bounds.shape[0]))
     
     def get_tensorspace(self):
         """Get the searchspace encoded in a Tensor."""
-        if self.tensorspace is None:
+        if self._tensorspace is None:
             self.initialize_tensorspace()
-        return self.tensorspace
+        return self._tensorspace
+    
+    def get_tensorspace_categorical_dimensions(self):
+        """Get the a list of the categorical dimensions in the tensorspace."""
+        return self._tensorspace_categorical_dimensions
     
     def param_config_to_tensor(self, param_config: tuple):
         """Convert from a parameter configuration to a Tensor."""
@@ -644,6 +660,12 @@ def tensor_to_param_config(self, tensor: Tensor):
         for i, param in enumerate(tensor):
             config.append(self._map_tensor_to_param[i][float(param)])
         return tuple(config)
+    
+    def get_tensorspace_bounds(self):
+        """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters."""
+        if self._tensorspace is None:
+            self.initialize_tensorspace()
+        return self._tensorspace_bounds, self._tensorspace_bounds_indices
 
     def __prepare_neighbors_index(self):
         """Prepare by calculating the indices for the individual parameters."""

From 07ef1d49dd91e2c84aefe1ba95c9d08ef63f0e0c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 8 Nov 2024 23:15:14 -0800
Subject: [PATCH 058/168] Implemented normalization for input features

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 68028d72c..8026c4c13 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -7,6 +7,7 @@
     from botorch import fit_gpytorch_mll
     from botorch.acquisition import LogExpectedImprovement
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
+    from botorch.models.transforms import Normalize, Standardize
     from botorch.optim import optimize_acqf_discrete
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
@@ -87,14 +88,21 @@ def initial_sample(self):
 
     def initialize_model(self, state_dict=None, exact=True):
         """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
+        train_X = self.train_X
+        train_Y = self.train_Y
+        # transforms = dict(input_transform=Normalize(train_X.dim()), outcome_transform=Standardize(train_Y.dim()))
+        bounds, bounds_indices = self.searchspace.get_tensorspace_bounds()
+        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds))
+
         # initialize the model
         if exact:
-            if len(self.searchspace.tensor_categorical_dimensions) == 0:
-                model = SingleTaskGP(self.train_X, self.train_Y)
+            catdims = self.searchspace.get_tensorspace_categorical_dimensions()
+            if len(catdims) == 0:
+                model = SingleTaskGP(train_X, train_Y, **transforms)
             else:
-                model = MixedSingleTaskGP(self.train_X, self.train_Y, self.searchspace.tensor_categorical_dimensions)
+                model = MixedSingleTaskGP(train_X, train_Y, cat_dims=catdims, **transforms)
         else:
-            model = SingleTaskVariationalGP(self.train_X, self.train_Y)
+            model = SingleTaskVariationalGP(train_X, train_Y, **transforms)
 
         # load the previous state
         if exact and state_dict is not None:
@@ -104,7 +112,7 @@ def initialize_model(self, state_dict=None, exact=True):
         if exact:
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
         else:
-            mll = VariationalELBO(model.likelihood, model.model, num_data=self.train_Y.size(0))
+            mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
     def run(self, max_fevals: int):

From 721d072414aa1cc3eb4c1b3a5a8481dcfaac9883 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 00:55:47 -0800
Subject: [PATCH 059/168] Tensorspace is reduced by removing inconsequential
 parameters

---
 kernel_tuner/searchspace.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 7e9315d06..8b9ac0299 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -62,8 +62,9 @@ def __init__(
         self._tensorspace_bounds = None
         self._tensorspace_bounds_indices = []
         self._tensorspace_categorical_dimensions = []
-        self._map_tensor_to_param = []
-        self._map_param_to_tensor = []
+        self._tensorspace_param_config_structure = []
+        self._map_tensor_to_param = {}
+        self._map_param_to_tensor = {}
         self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
         self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
@@ -601,6 +602,14 @@ def initialize_tensorspace(self):
 
         # generate the mappings to and from tensor values
         for index, param_values in enumerate(self.params_values):
+            # filter out parameters that do not matter, more efficient and avoids bounds problem
+            if len(param_values) < 2 or all(p == param_values[0] for p in param_values):
+                # keep track of skipped parameters, add them back in conversion functions
+                self._tensorspace_param_config_structure.append(param_values[0])
+                continue
+            else:
+                self._tensorspace_param_config_structure.append(None)
+
             # convert numericals to float, or encode categorical
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = np.array(param_values).astype(float)
@@ -608,12 +617,18 @@ def initialize_tensorspace(self):
                 self._tensorspace_categorical_dimensions.append(index)
                 tensor_values = np.arange(len(param_values))
 
-            self._map_param_to_tensor.append(dict(zip(param_values, tensor_values)))
-            self._map_tensor_to_param.append(dict(zip(tensor_values, param_values)))
+            # write the mappings to the object
+            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values)))
+            self._map_tensor_to_param[index] = (dict(zip(tensor_values, param_values)))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
                 self._tensorspace_bounds_indices.append(index)
 
+        # do some checks
+        assert len(self.params_values) == len(self._tensorspace_param_config_structure)
+        assert len(self._map_param_to_tensor) == len(self._map_tensor_to_param) == len(bounds)
+        assert len(self._tensorspace_bounds_indices) <= len(bounds)
+
         # apply the mappings on the full searchspace
         numpy_repr = self.get_list_numpy()
         numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
@@ -639,6 +654,8 @@ def param_config_to_tensor(self, param_config: tuple):
             self.initialize_tensorspace()
         array = []
         for i, param in enumerate(param_config):
+            if self._tensorspace_param_config_structure[i] is not None:
+                continue    # skip over parameters not in the tensorspace
             mapping = self._map_param_to_tensor[i]
             conversions = [None, str, float, int, bool]
             for c in conversions:
@@ -656,9 +673,14 @@ def tensor_to_param_config(self, tensor: Tensor):
         assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
         if len(self._map_tensor_to_param) == 0:
             self.initialize_tensorspace()
-        config = []
-        for i, param in enumerate(tensor):
-            config.append(self._map_tensor_to_param[i][float(param)])
+        config = self._tensorspace_param_config_structure.copy()
+        skip_counter = 0
+        for i, param in enumerate(config):
+            if param is not None:
+                skip_counter += 1
+            else:
+                value = float(tensor[i-skip_counter])
+                config[i] = self._map_tensor_to_param[i][value]
         return tuple(config)
     
     def get_tensorspace_bounds(self):

From 2434b3b93bed8d49e88342d6e227ece243c8bd1d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 01:11:18 -0800
Subject: [PATCH 060/168] Extended strategies tests  to include single
 parameter value

---
 test/strategies/test_cache_file.json | 49 +++++++++++++++++++---------
 test/strategies/test_strategies.py   |  2 ++
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/test/strategies/test_cache_file.json b/test/strategies/test_cache_file.json
index 5e0c0e054..6073d4b62 100644
--- a/test/strategies/test_cache_file.json
+++ b/test/strategies/test_cache_file.json
@@ -4,6 +4,7 @@
     "tune_params_keys": [
         "block_size_x",
         "test_string",
+        "test_single",
         "test_bool",
         "test_mixed"
     ],
@@ -29,6 +30,9 @@
             "alg_1",
             "alg_2"
         ],
+        "test_single": [
+            15
+        ],
         "test_bool": [
             true,
             false
@@ -41,9 +45,10 @@
         ]
     },
     "cache": {
-        "128,alg_2,True,2.45": {
+        "128,alg_2,15,True,2.45": {
             "block_size_x": 128,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04073600071881499,
@@ -63,9 +68,10 @@
             "framework_time": 0.8587837219238281,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "192,alg_2,True,2.45": {
+        "192,alg_2,15,True,2.45": {
             "block_size_x": 192,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04095085710287094,
@@ -85,9 +91,10 @@
             "framework_time": 1.6656816005706787,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "256,alg_2,True,2.45": {
+        "256,alg_2,15,True,2.45": {
             "block_size_x": 256,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04189257137477398,
@@ -107,9 +114,10 @@
             "framework_time": 1.6054585576057434,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "320,alg_2,True,2.45": {
+        "320,alg_2,15,True,2.45": {
             "block_size_x": 320,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04208914376795292,
@@ -129,9 +137,10 @@
             "framework_time": 1.4494173228740692,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "384,alg_2,True,2.45": {
+        "384,alg_2,15,True,2.45": {
             "block_size_x": 384,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04174171467976911,
@@ -151,9 +160,10 @@
             "framework_time": 1.682564616203308,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "448,alg_2,True,2.45": {
+        "448,alg_2,15,True,2.45": {
             "block_size_x": 448,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.03249828570655414,
@@ -173,9 +183,10 @@
             "framework_time": 1.5890561044216156,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "512,alg_2,True,2.45": {
+        "512,alg_2,15,True,2.45": {
             "block_size_x": 512,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04139885599059718,
@@ -195,9 +206,10 @@
             "framework_time": 1.853298395872116,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "576,alg_2,True,2.45": {
+        "576,alg_2,15,True,2.45": {
             "block_size_x": 576,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04189257137477398,
@@ -217,9 +229,10 @@
             "framework_time": 1.8403716385364532,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "640,alg_2,True,2.45": {
+        "640,alg_2,15,True,2.45": {
             "block_size_x": 640,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.0411702852163996,
@@ -239,9 +252,10 @@
             "framework_time": 1.8264725804328918,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "704,alg_2,True,2.45": {
+        "704,alg_2,15,True,2.45": {
             "block_size_x": 704,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04184228580977235,
@@ -261,9 +275,10 @@
             "framework_time": 1.6709677875041962,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "768,alg_2,True,2.45": {
+        "768,alg_2,15,True,2.45": {
             "block_size_x": 768,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.03175771422684193,
@@ -283,9 +298,10 @@
             "framework_time": 1.7531625926494598,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "832,alg_2,True,2.45": {
+        "832,alg_2,15,True,2.45": {
             "block_size_x": 832,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.040941715240478516,
@@ -305,9 +321,10 @@
             "framework_time": 2.1368376910686493,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "896,alg_2,True,2.45": {
+        "896,alg_2,15,True,2.45": {
             "block_size_x": 896,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04177371359297207,
@@ -327,9 +344,10 @@
             "framework_time": 2.03637033700943,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "960,alg_2,True,2.45": {
+        "960,alg_2,15,True,2.45": {
             "block_size_x": 960,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.042189714631864,
@@ -349,9 +367,10 @@
             "framework_time": 1.7383433878421783,
             "timestamp": "2022-12-23 12:11:26.411558+00:00"
         },
-        "1024,alg_2,True,2.45": {
+        "1024,alg_2,15,True,2.45": {
             "block_size_x": 1024,
             "test_string": "alg_2",
+            "test_single": 15,
             "test_bool": true,
             "test_mixed": 2.45,
             "time": 0.04114742816558906,
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index b7b2851dd..9c0e9faca 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -32,6 +32,7 @@ def vector_add():
     tune_params = dict()
     tune_params["block_size_x"] = [128 + 64 * i for i in range(15)]
     tune_params["test_string"] = ["alg_1", "alg_2"]
+    tune_params["test_single"] = [15]
     tune_params["test_bool"] = [True, False]
     tune_params["test_mixed"] = ["test", 1, True, 2.45]
 
@@ -82,6 +83,7 @@ def test_strategies(vector_add, strategy):
     expected_items = {
         'block_size_x': int,
         'test_string': str,
+        'test_single': int,
         'test_bool': bool,
         'test_mixed': float,
         'time': (float, int),

From 1679751be384a7c0c82a85fc1af15d770a5b0711 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 01:12:24 -0800
Subject: [PATCH 061/168] Fixed an indexing error for tensorspace bounds

---
 kernel_tuner/searchspace.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 8b9ac0299..3085688c1 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -598,6 +598,7 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
     def initialize_tensorspace(self):
         """Encode the searchspace as floats in a Tensor. Save the mapping."""
         assert self._tensorspace is None, "Tensorspace is already initialized"
+        skipped_count = 0
         bounds = []
 
         # generate the mappings to and from tensor values
@@ -606,6 +607,7 @@ def initialize_tensorspace(self):
             if len(param_values) < 2 or all(p == param_values[0] for p in param_values):
                 # keep track of skipped parameters, add them back in conversion functions
                 self._tensorspace_param_config_structure.append(param_values[0])
+                skipped_count += 1
                 continue
             else:
                 self._tensorspace_param_config_structure.append(None)
@@ -614,7 +616,7 @@ def initialize_tensorspace(self):
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = np.array(param_values).astype(float)
             else:
-                self._tensorspace_categorical_dimensions.append(index)
+                self._tensorspace_categorical_dimensions.append(index-skipped_count)
                 tensor_values = np.arange(len(param_values))
 
             # write the mappings to the object
@@ -622,7 +624,7 @@ def initialize_tensorspace(self):
             self._map_tensor_to_param[index] = (dict(zip(tensor_values, param_values)))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
-                self._tensorspace_bounds_indices.append(index)
+                self._tensorspace_bounds_indices.append(index-skipped_count)
 
         # do some checks
         assert len(self.params_values) == len(self._tensorspace_param_config_structure)

From 2b816a641168dda4add7731fc0bb31f5ba599cdd Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 01:20:57 -0800
Subject: [PATCH 062/168] Extended searchspace tests to include single
 parameter value

---
 test/test_searchspace.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index 48f049750..eaf546387 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -27,6 +27,10 @@
 simple_searchspace = Searchspace(simple_tune_params, restrict, max_threads)
 simple_searchspace_bruteforce = Searchspace(simple_tune_params, restrict, max_threads, framework="bruteforce")
 
+simple_tune_params_single = simple_tune_params.copy()
+simple_tune_params_single["s"] = [True]
+simple_searchspace_single = Searchspace(simple_tune_params_single, restrict, max_threads)
+
 # 3.1 million combinations, of which 10600 pass the restrictions
 num_layers = 42
 tune_params = dict()
@@ -185,11 +189,12 @@ def test_get_tensorspace():
 
 def test_conversion_tensor_param_config():
     """Test the conversion from a parameter configuration to a tensor and tensor to parameter configuration."""
-    for config in simple_searchspace.list:
-        tensor = simple_searchspace.param_config_to_tensor(config)
-        config_2 = simple_searchspace.tensor_to_param_config(tensor)
+    for config in simple_searchspace_single.list:
+        tensor = simple_searchspace_single.param_config_to_tensor(config)
+        config_2 = simple_searchspace_single.tensor_to_param_config(tensor)
         assert config == config_2
-        assert tensor.equal(simple_searchspace.param_config_to_tensor(config_2))
+        assert tensor.equal(simple_searchspace_single.param_config_to_tensor(config_2))
+        assert len(tensor) == len(config) - 1
 
 
 def test_random_sample():

From c417585b05bfde8415bd1c0d4ad70b282750d60b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 21:55:10 -0800
Subject: [PATCH 063/168] Implemented additional acquisition functions, reduced
 number of reinitializations

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 31 +++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 8026c4c13..3aff658ab 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -1,11 +1,19 @@
 """Bayesian Optimization implementation using BO Torch."""
 
+from math import ceil
+
 import numpy as np
 
 try:
     import torch
     from botorch import fit_gpytorch_mll
-    from botorch.acquisition import LogExpectedImprovement
+    from botorch.acquisition import (
+        LogExpectedImprovement,
+        ProbabilityOfImprovement,
+        qExpectedUtilityOfBestOption,
+        qLogExpectedImprovement,
+        qLowerBoundMaxValueEntropy,
+    )
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
     from botorch.optim import optimize_acqf_discrete
@@ -115,25 +123,31 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int):
+    def run(self, max_fevals: int, feval_per_loop=1):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
-                mll, model = self.initialize_model()
+            mll, model = self.initialize_model()
 
             # Bayesian optimization loop
-            for _ in range(max_fevals):
+            max_loops = ceil(max_fevals/feval_per_loop)
+            for f in range(max_loops):
                 # fit a Gaussian Process model
                 fit_gpytorch_mll(mll)
                 
                 # Define the acquisition function
-                ei = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                # acqf = NoisyExpectedImprovement(model=model, , maximize=False)
+                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
+                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=False)
+                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.min())
+                # acqf = qExpectedUtilityOfBestOption(pref_model=model)
                 
                 # Optimize acquisition function to find the next evaluation point
                 candidate, _ = optimize_acqf_discrete(
-                    ei, 
-                    q=1, 
+                    acqf, 
+                    q=feval_per_loop, 
                     choices=self.searchspace_tensors
                 )
                 
@@ -141,7 +155,8 @@ def run(self, max_fevals: int):
                 self.evaluate_configs(candidate)
 
                 # reinitialize the models so they are ready for fitting on next iteration
-                mll, model = self.initialize_model(model.state_dict())
+                if f < max_loops - 1:
+                    mll, model = self.initialize_model(model.state_dict())
         except util.StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From 3d53b29af0d7b58e76b5d7431a6a8a20cdddd0c2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Sat, 9 Nov 2024 22:49:00 -0800
Subject: [PATCH 064/168] Implemented division of tensorspace into chunks for
 faster optimization

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 39 ++++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 3aff658ab..4bc7b482b 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -42,7 +42,7 @@ class BayesianOptimization():
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         """Initialization of the Bayesian Optimization class. Does not evaluate configurations."""
         self.initial_sample_taken = False
-        self.initial_sample_size = tuning_options.strategy_options.get("popsize", 20)
+        self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
@@ -123,12 +123,13 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int, feval_per_loop=1):
+    def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
             mll, model = self.initialize_model()
+            num_fevals = self.initial_sample_size
 
             # Bayesian optimization loop
             max_loops = ceil(max_fevals/feval_per_loop)
@@ -136,23 +137,37 @@ def run(self, max_fevals: int, feval_per_loop=1):
                 # fit a Gaussian Process model
                 fit_gpytorch_mll(mll)
                 
-                # Define the acquisition function
+                # define the acquisition function
                 acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 # acqf = NoisyExpectedImprovement(model=model, , maximize=False)
                 # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
                 # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=False)
                 # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.min())
                 # acqf = qExpectedUtilityOfBestOption(pref_model=model)
+
+                # divide the optimization space into random chuncks
+                tensorspace_size = self.searchspace_tensors.size(0)
+                num_optimization_spaces = max(min(feval_per_loop, max_fevals-num_fevals), ceil(tensorspace_size / max_batch_size))
+                if num_optimization_spaces <= 1:
+                    optimization_spaces = [self.searchspace_tensors]
+                else:
+                    # shuffle the searchspace
+                    shuffled_indices = torch.randperm(tensorspace_size)
+                    tensorspace = self.searchspace_tensors[shuffled_indices]
+                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
                 
-                # Optimize acquisition function to find the next evaluation point
-                candidate, _ = optimize_acqf_discrete(
-                    acqf, 
-                    q=feval_per_loop, 
-                    choices=self.searchspace_tensors
-                )
-                
-                # evaluate the new candidate
-                self.evaluate_configs(candidate)
+                # optimize acquisition function to find the next evaluation point
+                for optimization_space in optimization_spaces:
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    self.evaluate_configs(candidate)
+                    num_fevals += 1
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 if f < max_loops - 1:

From 3ed43a68a9e6ffc42fa63138ee18177aa3021072 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 11 Nov 2024 17:48:49 -0800
Subject: [PATCH 065/168] Switch to fit_gpytorch_mll_torch for faster fitting,
 use approximate mode

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 4bc7b482b..6375caa62 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -17,18 +17,30 @@
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
     from botorch.optim import optimize_acqf_discrete
+    from botorch.optim.fit import fit_gpytorch_mll_torch
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
     bayes_opt_present = True
 except ImportError:
     bayes_opt_present = False
 
+import gpytorch.settings as gp_settings
+import linear_operator.settings as linop_settings
+
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import (
     CostFunc,
 )
 
+# set gpytorch to approximate mode for faster fitting
+linop_settings._fast_covar_root_decomposition._default = True
+linop_settings._fast_log_prob._default = True
+linop_settings._fast_solves._default = True
+linop_settings.cholesky_max_tries._global_value = 6
+linop_settings.max_cholesky_size._global_value = 800
+gp_settings.max_eager_kernel_size._global_value = 800
+
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     """The entry function for tuning a searchspace using this algorithm."""
@@ -49,6 +61,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         # set up conversion to tensors
         self.searchspace = searchspace
         self.searchspace_tensors = searchspace.get_tensorspace()
+        self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
         self.train_X = torch.empty(0)
         self.train_Y = torch.empty(0)
 
@@ -99,8 +112,7 @@ def initialize_model(self, state_dict=None, exact=True):
         train_X = self.train_X
         train_Y = self.train_Y
         # transforms = dict(input_transform=Normalize(train_X.dim()), outcome_transform=Standardize(train_Y.dim()))
-        bounds, bounds_indices = self.searchspace.get_tensorspace_bounds()
-        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds))
+        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds))
 
         # initialize the model
         if exact:
@@ -135,7 +147,7 @@ def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
             max_loops = ceil(max_fevals/feval_per_loop)
             for f in range(max_loops):
                 # fit a Gaussian Process model
-                fit_gpytorch_mll(mll)
+                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
                 
                 # define the acquisition function
                 acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)

From 559813fbf81b4a21da6687bab5f3453016320f76 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 11 Nov 2024 20:40:16 -0800
Subject: [PATCH 066/168] Implemented running BO on GPU / Apple Silicon,
 settable precision

---
 kernel_tuner/searchspace.py                  | 36 +++++++++++++-------
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 14 +++++---
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 3085688c1..201052e8d 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -59,6 +59,9 @@ def __init__(
         restrictions = restrictions if restrictions is not None else []
         self.tune_params = tune_params
         self._tensorspace = None
+        self.tensor_dtype = torch.float32 if torch_available else None
+        self.tensor_device = torch.device("cpu") if torch_available else None
+        self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device)
         self._tensorspace_bounds = None
         self._tensorspace_bounds_indices = []
         self._tensorspace_categorical_dimensions = []
@@ -595,11 +598,16 @@ def get_param_config_index(self, param_config: Union[tuple, Tensor]):
         # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space
         return self.__dict.get(param_config, None)
     
-    def initialize_tensorspace(self):
-        """Encode the searchspace as floats in a Tensor. Save the mapping."""
+    def initialize_tensorspace(self, dtype = None, device = None):
+        """Encode the searchspace in a Tensor. Save the mapping. Call this function directly to control the precision or device used."""
         assert self._tensorspace is None, "Tensorspace is already initialized"
         skipped_count = 0
         bounds = []
+        if dtype is not None:
+            self.tensor_dtype = dtype
+        if device is not None:
+            self.tensor_device = device
+        self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device)
 
         # generate the mappings to and from tensor values
         for index, param_values in enumerate(self.params_values):
@@ -614,14 +622,15 @@ def initialize_tensorspace(self):
 
             # convert numericals to float, or encode categorical
             if all(isinstance(v, numbers.Real) for v in param_values):
-                tensor_values = np.array(param_values).astype(float)
+                tensor_values = torch.tensor(param_values, dtype=self.tensor_dtype)
             else:
                 self._tensorspace_categorical_dimensions.append(index-skipped_count)
-                tensor_values = np.arange(len(param_values))
+                # tensor_values = np.arange(len(param_values))
+                tensor_values = torch.arange(len(param_values), dtype=self.tensor_dtype)
 
             # write the mappings to the object
-            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values)))
-            self._map_tensor_to_param[index] = (dict(zip(tensor_values, param_values)))
+            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values.tolist())))
+            self._map_tensor_to_param[index] = (dict(zip(tensor_values.tolist(), param_values)))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
                 self._tensorspace_bounds_indices.append(index-skipped_count)
@@ -632,16 +641,17 @@ def initialize_tensorspace(self):
         assert len(self._tensorspace_bounds_indices) <= len(bounds)
 
         # apply the mappings on the full searchspace
-        numpy_repr = self.get_list_numpy()
-        numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
-        self._tensorspace = torch.from_numpy(numpy_repr.astype(float))
+        # numpy_repr = self.get_list_numpy()
+        # numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr)
+        # self._tensorspace = torch.from_numpy(numpy_repr.astype(self.tensor_dtype)).to(self.tensor_device)
+        self._tensorspace = torch.stack(tuple(map(self.param_config_to_tensor, self.list)))
 
         # set the bounds in the correct format (one array for the min, one for the max)
-        bounds = torch.from_numpy(np.array(bounds))
+        bounds = torch.tensor(bounds, **self.tensor_kwargs)
         self._tensorspace_bounds = torch.cat([bounds[:,0], bounds[:,1]]).reshape((2, bounds.shape[0]))
     
     def get_tensorspace(self):
-        """Get the searchspace encoded in a Tensor."""
+        """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first."""
         if self._tensorspace is None:
             self.initialize_tensorspace()
         return self._tensorspace
@@ -668,7 +678,7 @@ def param_config_to_tensor(self, param_config: tuple):
                 except (KeyError, ValueError) as e:
                     if c == conversions[-1]:
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
-        return torch.from_numpy(np.array(array))
+        return torch.tensor(array, **self.tensor_kwargs)
     
     def tensor_to_param_config(self, tensor: Tensor):
         """Convert from a Tensor to a parameter configuration."""
@@ -681,7 +691,7 @@ def tensor_to_param_config(self, tensor: Tensor):
             if param is not None:
                 skip_counter += 1
             else:
-                value = float(tensor[i-skip_counter])
+                value = tensor[i-skip_counter].item()
                 config[i] = self._map_tensor_to_param[i][value]
         return tuple(config)
     
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 6375caa62..18c7264a5 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -58,12 +58,16 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.tuning_options = tuning_options
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
+        # select the device to use (CUDA or Apple Silicon MPS if available)
+        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu")
+
         # set up conversion to tensors
         self.searchspace = searchspace
+        self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
         self.searchspace_tensors = searchspace.get_tensorspace()
         self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
-        self.train_X = torch.empty(0)
-        self.train_Y = torch.empty(0)
+        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs)
+        self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
 
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
@@ -95,14 +99,14 @@ def evaluate_configs(self, X: Tensor):
 
             # add valid results to the training set
             if len(valid_configs) > 0 and len(valid_results) > 0:
-                self.train_X = torch.cat([self.train_X, torch.from_numpy(np.array(valid_configs))])
-                self.train_Y = torch.cat([self.train_Y, torch.from_numpy(np.array(valid_results))])
+                self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
+                self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
     def initial_sample(self):
         """Take an initial sample."""
-        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size))
+        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
         sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
         self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True

From c391428de491cc7d97ae711686eae44f1afaf933 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 11 Nov 2024 20:56:46 -0800
Subject: [PATCH 067/168] Removed Apple Silicon MPS support as cholesky
 operation is not yet implemented

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 18c7264a5..3f0bf6fee 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -59,7 +59,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
 
         # select the device to use (CUDA or Apple Silicon MPS if available)
-        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu")
+        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS.
+        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
         # set up conversion to tensors
         self.searchspace = searchspace

From 07925c5732fe4ad083bfd01cc7b5381047826316 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 00:42:05 -0800
Subject: [PATCH 068/168] Implemented discrete local search for cases where the
 tensorspace isn't split

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 28 ++++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 3f0bf6fee..f15415a2e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -16,7 +16,7 @@
     )
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
-    from botorch.optim import optimize_acqf_discrete
+    from botorch.optim import optimize_acqf_discrete, optimize_acqf_discrete_local_search
     from botorch.optim.fit import fit_gpytorch_mll_torch
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
@@ -140,7 +140,7 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
+    def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
@@ -175,12 +175,24 @@ def run(self, max_fevals: int, feval_per_loop=5, max_batch_size=2048):
                 
                 # optimize acquisition function to find the next evaluation point
                 for optimization_space in optimization_spaces:
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
+
+                    # optimize over a lattice if the space is too large
+                    if max_batch_size < optimization_space.size(0):
+                        candidate, _ = optimize_acqf_discrete_local_search(
+                            acqf, 
+                            q=1,
+                            discrete_choices=optimization_space, 
+                            max_batch_size=max_batch_size,
+                            num_restarts=5,
+                            raw_samples=1024
+                        )
+                    else:
+                        candidate, _ = optimize_acqf_discrete(
+                            acqf, 
+                            q=1, 
+                            choices=optimization_space,
+                            max_batch_size=max_batch_size
+                        )
                     
                     # evaluate the new candidate
                     self.evaluate_configs(candidate)

From 4113513cfef60ab0eead6c30cf67ddaec3ee0d4d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 15:11:33 -0800
Subject: [PATCH 069/168] Implemented standardization of output

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index f15415a2e..fa4fc44e6 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -116,8 +116,10 @@ def initialize_model(self, state_dict=None, exact=True):
         """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
         train_X = self.train_X
         train_Y = self.train_Y
-        # transforms = dict(input_transform=Normalize(train_X.dim()), outcome_transform=Standardize(train_Y.dim()))
-        transforms = dict(input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds))
+        transforms = dict(
+            input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
+            outcome_transform=Standardize(m=train_Y.size(-1))
+        )
 
         # initialize the model
         if exact:
@@ -179,9 +181,9 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
                     # optimize over a lattice if the space is too large
                     if max_batch_size < optimization_space.size(0):
                         candidate, _ = optimize_acqf_discrete_local_search(
-                            acqf, 
+                            acqf,
                             q=1,
-                            discrete_choices=optimization_space, 
+                            discrete_choices=optimization_space,
                             max_batch_size=max_batch_size,
                             num_restarts=5,
                             raw_samples=1024

From ed12b5a1b3dd402691a7e398b05005e1cb0ea03f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 15:27:02 -0800
Subject: [PATCH 070/168] Implemented unified optimization direction

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index fa4fc44e6..ac2fa3ea3 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -57,6 +57,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
         self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
+        self.maximize = tuning_options['objective_higher_is_better']
 
         # select the device to use (CUDA or Apple Silicon MPS if available)
         # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS.
@@ -76,6 +77,8 @@ def run_config(self, config: tuple):
         valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result)
         if not valid:
             result = np.nan
+        elif not self.maximize:
+            result = -result
         return [result], valid
 
     def evaluate_configs(self, X: Tensor):
@@ -157,11 +160,11 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
                 fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
                 
                 # define the acquisition function
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
-                # acqf = NoisyExpectedImprovement(model=model, , maximize=False)
-                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.min(), maximize=False)
-                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=False)
-                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.min())
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                # acqf = NoisyExpectedImprovement(model=model, , maximize=True)
+                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=True)
+                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.max())
                 # acqf = qExpectedUtilityOfBestOption(pref_model=model)
 
                 # divide the optimization space into random chuncks

From d62c9410f68bac31daa02557edf1f1f8ed99cdbc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 15:31:24 -0800
Subject: [PATCH 071/168] Updated outcome standardization

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index ac2fa3ea3..1031448bb 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -121,7 +121,7 @@ def initialize_model(self, state_dict=None, exact=True):
         train_Y = self.train_Y
         transforms = dict(
             input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
-            outcome_transform=Standardize(m=train_Y.size(-1))
+            outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
         )
 
         # initialize the model

From 1c015cb579172139feb72e263d5114e1d2701c67 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 17:35:33 -0800
Subject: [PATCH 072/168] Using extra information from variance in BO for
 better fits

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 28 +++++++++++++-------
 kernel_tuner/strategies/common.py            | 13 ++++++++-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 1031448bb..1443a5a09 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -56,11 +56,11 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.initial_sample_taken = False
         self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
         self.tuning_options = tuning_options
-        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True)
+        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True, return_raw=True)
         self.maximize = tuning_options['objective_higher_is_better']
 
         # select the device to use (CUDA or Apple Silicon MPS if available)
-        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS.
+        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS (issue reported: https://github.com/pytorch/pytorch/issues/77764).
         self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
         # set up conversion to tensors
@@ -70,31 +70,39 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
         self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
+        self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
 
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
-        result = self.cost_func(config)
-        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result)
+        result, results = self.cost_func(config)
+        results = np.array(results)
+        var = np.nan
+        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
         if not valid:
             result = np.nan
         elif not self.maximize:
             result = -result
-        return [result], valid
+            results = -results
+        if valid:
+            var = np.var(results)
+        return [result], [var], valid
 
     def evaluate_configs(self, X: Tensor):
         """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
         if isinstance(X, Tensor):
             valid_configs = []
             valid_results = []
+            valid_vars = []
             if X.dim() == 1:
                 X = [X]
             for config in X:
                 assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
                 param_config = self.searchspace.tensor_to_param_config(config)
-                res, valid = self.run_config(param_config)
+                res, var, valid = self.run_config(param_config)
                 if valid:
                     valid_configs.append(config)
                     valid_results.append(res)
+                    valid_vars.append(var)
                 
                 # remove evaluated configurations from the full searchspace
                 index = self.searchspace.get_param_config_index(param_config)
@@ -102,9 +110,10 @@ def evaluate_configs(self, X: Tensor):
                                                       self.searchspace_tensors[index+1:]))
 
             # add valid results to the training set
-            if len(valid_configs) > 0 and len(valid_results) > 0:
+            if len(valid_configs) > 0 and len(valid_results) > 0 and len(valid_vars) > 0:
                 self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
                 self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
+                self.train_Yvar = torch.cat([self.train_Yvar, torch.tensor(valid_vars, **self.searchspace.tensor_kwargs)])
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
@@ -119,6 +128,7 @@ def initialize_model(self, state_dict=None, exact=True):
         """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
         train_X = self.train_X
         train_Y = self.train_Y
+        train_Yvar = self.train_Yvar
         transforms = dict(
             input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
             outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
@@ -128,9 +138,9 @@ def initialize_model(self, state_dict=None, exact=True):
         if exact:
             catdims = self.searchspace.get_tensorspace_categorical_dimensions()
             if len(catdims) == 0:
-                model = SingleTaskGP(train_X, train_Y, **transforms)
+                model = SingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, **transforms)
             else:
-                model = MixedSingleTaskGP(train_X, train_Y, cat_dims=catdims, **transforms)
+                model = MixedSingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, cat_dims=catdims, **transforms)
         else:
             model = SingleTaskVariationalGP(train_X, train_Y, **transforms)
 
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 7901f97a0..eb0b81e27 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -62,7 +62,7 @@ class CostFunc:
 
     def __init__(
         self, searchspace: Searchspace, tuning_options, runner, *, 
-        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False
+        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False, return_raw=None
     ):
         """An abstract method to handle evaluation of configurations.
 
@@ -74,12 +74,16 @@ def __init__(
             snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True.
             encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
             return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
+            return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None.
         """        
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
         self.encode_non_numeric = encode_non_numeric
         self.return_invalid = return_invalid
+        self.return_raw = return_raw
+        if return_raw is True:
+            self.return_raw = f"{tuning_options['objective']}s"
         self.searchspace = searchspace
         self.tuning_options = tuning_options
         if isinstance(self.tuning_options, dict):
@@ -157,6 +161,13 @@ def __call__(self, x, check_restrictions=True):
             return_value = result[self.tuning_options.objective] or sys.float_info.max
         return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value
 
+        # include raw data in return if requested
+        if self.return_raw is not None:
+            try:
+                return return_value, result[self.return_raw]
+            except KeyError:
+                return return_value, [np.nan]
+
         return return_value
 
     def get_bounds_x0_eps(self):

From cad10f8ec6a81f878eb0933965682f59b0dda59b Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 12 Nov 2024 20:00:21 -0800
Subject: [PATCH 073/168] Implemented gradual cooldown on multi-feval depending
 on number of fevals left

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 63 ++++++++++++--------
 1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 1443a5a09..d4127d43c 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -1,6 +1,6 @@
 """Bayesian Optimization implementation using BO Torch."""
 
-from math import ceil
+from math import ceil, sqrt
 
 import numpy as np
 
@@ -155,17 +155,32 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return mll, model
 
-    def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
+    def run(self, max_fevals: int, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
             mll, model = self.initialize_model()
-            num_fevals = self.initial_sample_size
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # create array to gradually reduce number of optimization spaces as fewer fevals are left
+            tensorspace_size = self.searchspace_tensors.size(0)
+            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
+            fevals_left -= reserve_final_loops
+            num_loops = min(max(round(sqrt(fevals_left)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = round(tensorspace_size / max_batch_size)  # set the average number of optimization spaces
+            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
+            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
+            # if there's a discrepency, add or subtract the difference from the first number
+            if np.sum(nums_optimization_spaces) != fevals_left:
+                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
+            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
+            fevals_left += reserve_final_loops
 
             # Bayesian optimization loop
-            max_loops = ceil(max_fevals/feval_per_loop)
-            for f in range(max_loops):
+            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
+                num_optimization_spaces = min(num_optimization_spaces, fevals_left)
+
                 # fit a Gaussian Process model
                 fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
                 
@@ -179,7 +194,6 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
 
                 # divide the optimization space into random chuncks
                 tensorspace_size = self.searchspace_tensors.size(0)
-                num_optimization_spaces = max(min(feval_per_loop, max_fevals-num_fevals), ceil(tensorspace_size / max_batch_size))
                 if num_optimization_spaces <= 1:
                     optimization_spaces = [self.searchspace_tensors]
                 else:
@@ -191,30 +205,31 @@ def run(self, max_fevals: int, feval_per_loop=10, max_batch_size=2048):
                 # optimize acquisition function to find the next evaluation point
                 for optimization_space in optimization_spaces:
 
+                    # NOTE optimize_acqf_discrete_local_search does not work with variable optimization_space size
                     # optimize over a lattice if the space is too large
-                    if max_batch_size < optimization_space.size(0):
-                        candidate, _ = optimize_acqf_discrete_local_search(
-                            acqf,
-                            q=1,
-                            discrete_choices=optimization_space,
-                            max_batch_size=max_batch_size,
-                            num_restarts=5,
-                            raw_samples=1024
-                        )
-                    else:
-                        candidate, _ = optimize_acqf_discrete(
-                            acqf, 
-                            q=1, 
-                            choices=optimization_space,
-                            max_batch_size=max_batch_size
-                        )
+                    # if len(optimization_spaces) == 1 and max_batch_size < optimization_space.size(0):
+                    #     candidate, _ = optimize_acqf_discrete_local_search(
+                    #         acqf,
+                    #         q=1,
+                    #         discrete_choices=optimization_space,
+                    #         max_batch_size=max_batch_size,
+                    #         num_restarts=5,
+                    #         raw_samples=1024
+                    #     )
+                    # else:
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
                     
                     # evaluate the new candidate
                     self.evaluate_configs(candidate)
-                    num_fevals += 1
+                    fevals_left -= 1
 
                 # reinitialize the models so they are ready for fitting on next iteration
-                if f < max_loops - 1:
+                if loop_i < len(nums_optimization_spaces) - 1:
                     mll, model = self.initialize_model(model.state_dict())
         except util.StopCriterionReached as e:
             if self.tuning_options.verbose:

From 1ed0352e88f0f64694fd2ebd51286dd74fca23d7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 18 Nov 2024 18:59:01 -0800
Subject: [PATCH 074/168] Adjusted the calculation of number of optimization
 spaces to be more gradual

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index d4127d43c..949fdb459 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -167,8 +167,8 @@ def run(self, max_fevals: int, max_batch_size=2048):
             tensorspace_size = self.searchspace_tensors.size(0)
             reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
             fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = round(tensorspace_size / max_batch_size)  # set the average number of optimization spaces
+            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
             numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
             nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
             # if there's a discrepency, add or subtract the difference from the first number

From 38f084ceca9154c68311d6fb0339658ebc892650 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 18 Nov 2024 22:43:37 -0800
Subject: [PATCH 075/168] Two different kernels as test files for BO

---
 tune_bo.py => tune_bo_conv.py |  2 +-
 tune_bo_dedisp.py             | 88 +++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)
 rename tune_bo.py => tune_bo_conv.py (99%)
 create mode 100644 tune_bo_dedisp.py

diff --git a/tune_bo.py b/tune_bo_conv.py
similarity index 99%
rename from tune_bo.py
rename to tune_bo_conv.py
index 81f1fe999..03ee7f2fa 100644
--- a/tune_bo.py
+++ b/tune_bo_conv.py
@@ -30,7 +30,7 @@ def tune(
     quiet=False,
     simulation_mode=True,
     lang="CUDA",
-    profiling=True,
+    profiling=False,
 ):  
     directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
     assert directory.exists()
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
new file mode 100644
index 000000000..2cfb3b58b
--- /dev/null
+++ b/tune_bo_dedisp.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+import os
+from collections import OrderedDict
+from pathlib import Path
+
+import kernel_tuner as kt
+
+nr_dms = 2048
+nr_samples = 25000
+nr_channels = 1536
+max_shift = 650
+nr_samples_per_channel = (nr_samples+max_shift)
+down_sampling = 1
+dm_first = 0.0
+dm_step = 0.02
+
+channel_bandwidth = 0.1953125
+sampling_time = 0.00004096
+min_freq = 1425.0
+max_freq = min_freq + (nr_channels-1) * channel_bandwidth
+
+
+def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+
+    args = []
+
+    answer = [None, None, None]
+
+    problem_size = (nr_samples, nr_dms, 1)
+    tune_params = OrderedDict()
+    tune_params["block_size_x"] = [1, 2, 4, 8] + [16*i for i in range(1,3)]
+    tune_params["block_size_y"] = [8*i for i in range(4,33)]
+    tune_params["block_size_z"] = [1]
+    tune_params["tile_size_x"] = [i for i in range(1,5)]
+    tune_params["tile_size_y"] = [i for i in range(1,9)]
+    tune_params["tile_stride_x"] = [0, 1]
+    tune_params["tile_stride_y"] = [0, 1]
+    tune_params["loop_unroll_factor_channel"] = [0] #+ [i for i in range(1,nr_channels+1) if nr_channels % i == 0] #[i for i in range(nr_channels+1)]
+
+    cp = [f"-I{os.path.dirname(os.path.realpath(__file__))}"]
+
+
+    check_block_size = "32 <= block_size_x * block_size_y <= 1024"
+    check_loop_x = "loop_unroll_factor_x <= tile_size_x and tile_size_x % loop_unroll_factor_x == 0"
+    check_loop_y = "loop_unroll_factor_y <= tile_size_y and tile_size_y % loop_unroll_factor_y == 0"
+    check_loop_channel = f"loop_unroll_factor_channel <= {nr_channels} and loop_unroll_factor_channel and {nr_channels} % loop_unroll_factor_channel == 0"
+
+    check_tile_stride_x = "tile_size_x > 1 or tile_stride_x == 0"
+    check_tile_stride_y = "tile_size_y > 1 or tile_stride_y == 0"
+
+    config_valid = [check_block_size, check_tile_stride_x, check_tile_stride_y]
+
+    metrics = OrderedDict()
+    gbytes = (nr_dms * nr_samples * nr_channels)/1e9
+    metrics["GB/s"] = lambda p: gbytes / (p['time'] / 1e3)
+
+    directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
+    cachefile = directory / f"cachefiles/dedispersion_milo/{device}.json"
+    assert directory.exists()
+    if lang == "CUDA":
+        kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu"
+    elif lang == "HIP":
+        kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu.hip"
+    else:
+        raise ValueError(f"Invalid {lang=}")
+
+    def run():
+        return kt.tune_kernel("dedispersion_kernel", kernel_file, problem_size, args, tune_params,
+                                answer=answer, compiler_options=cp, restrictions=config_valid, device=0,
+                                cache=cachefile, lang=lang, iterations=32, metrics=metrics, 
+                                simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, strategy_options=strategy_options)
+    
+    # start tuning
+    if profiling:
+        import cProfile
+
+        with cProfile.Profile() as pr:
+            results, env = run()
+            if profiling:
+                pr.dump_stats('bo_prof_torchfit_2.prof')
+    else:
+        results, env = run()
+
+    return results, env
+
+if __name__ == "__main__":
+
+    tune("A100")

From c447dc27f372a70cece027ef474799e27600e314 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 22:19:25 -0800
Subject: [PATCH 076/168] Setup structure for BOTorch transfer learning
 strategy as separate strategy

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |  8 ++--
 .../strategies/bayes_opt_BOTorch_transfer.py  | 38 +++++++++++++++++++
 2 files changed, 41 insertions(+), 5 deletions(-)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 949fdb459..689d64183 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -16,7 +16,7 @@
     )
     from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
     from botorch.models.transforms import Normalize, Standardize
-    from botorch.optim import optimize_acqf_discrete, optimize_acqf_discrete_local_search
+    from botorch.optim import optimize_acqf_discrete
     from botorch.optim.fit import fit_gpytorch_mll_torch
     from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
     from torch import Tensor
@@ -29,9 +29,7 @@
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.common import (
-    CostFunc,
-)
+from kernel_tuner.strategies.common import CostFunc
 
 # set gpytorch to approximate mode for faster fitting
 linop_settings._fast_covar_root_decomposition._default = True
@@ -235,4 +233,4 @@ def run(self, max_fevals: int, max_batch_size=2048):
             if self.tuning_options.verbose:
                 print(e)
 
-        return self.cost_func.results 
+        return self.cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
new file mode 100644
index 000000000..627d37a75
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -0,0 +1,38 @@
+"""Bayesian Optimization implementation using BO Torch."""
+
+try:
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    bo = BayesianOptimization(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimizationTransfer(BayesianOptimization):
+    """Bayesian Optimization class with transfer learning."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        super().__init__(searchspace, runner, tuning_options)
+
+    def run_config(self, config: tuple):
+        return super().run_config(config)
+    
+    def evaluate_configs(self, X: Tensor):
+        return super().evaluate_configs(X)
+    
+    def initial_sample(self):
+        return super().initial_sample()
+    
+    def initialize_model(self, state_dict=None, exact=True):
+        return super().initialize_model(state_dict, exact)
+    
+    def run(self, max_fevals: int, max_batch_size=2048):
+        return super().run(max_fevals, max_batch_size)
\ No newline at end of file

From 7c2fd5112c53dea03a8262a11bceb137acfd8714 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 22:43:41 -0800
Subject: [PATCH 077/168] Implemented Rank-Weighted GP Ensemble for
 transferlearning

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 365 +++++++++++++++++-
 1 file changed, 364 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 627d37a75..885bc6708 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -1,7 +1,22 @@
 """Bayesian Optimization implementation using BO Torch."""
 
 try:
+    import torch
+    from botorch.acquisition.logei import qLogNoisyExpectedImprovement
+    from botorch.fit import fit_gpytorch_mll
+    from botorch.models import SingleTaskGP
+    from botorch.models.gpytorch import GPyTorchModel
+    from botorch.optim.optimize import optimize_acqf
+    from botorch.sampling.normal import SobolQMCNormalSampler
+    from botorch.utils.sampling import draw_sobol_samples
+    from botorch.utils.transforms import normalize, unnormalize
+    from gpytorch.distributions import MultivariateNormal
+    from gpytorch.lazy import PsdSumLazyTensor
+    from gpytorch.likelihoods import LikelihoodList
+    from gpytorch.mlls import ExactMarginalLogLikelihood
+    from gpytorch.models import GP
     from torch import Tensor
+    from torch.nn import ModuleList
     bayes_opt_present = True
 except ImportError:
     bayes_opt_present = False
@@ -9,6 +24,17 @@
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
 
+# settings
+NUM_BASE_TASKS = 5
+N_BATCH = 10
+NUM_POSTERIOR_SAMPLES = 256
+RANDOM_INITIALIZATION_SIZE = 3
+N_TRIALS = 10
+MC_SAMPLES = 512
+N_RESTART_CANDIDATES = 512
+N_RESTARTS = 10
+Q_BATCH_SIZE = 1
+
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     """The entry function for tuning a searchspace using this algorithm."""
@@ -22,6 +48,45 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
+        self.best_rgpe_all = []
+        self.best_random_all = []
+        self.best_vanilla_nei_all = []
+        self.noise_std = 0.05
+
+        # Sample data for each base task
+        data_by_task = {}
+        for task in range(NUM_BASE_TASKS):
+            num_training_points = 20
+            # draw points from a sobol sequence
+            raw_x = draw_sobol_samples(
+                bounds=BOUNDS,
+                n=num_training_points,
+                q=1,
+                seed=task + 5397923,
+            ).squeeze(1)
+            # get observed values
+            f_x = f(raw_x, task_shift(task + 1))
+            train_y = f_x + noise_std * torch.randn_like(f_x)
+            train_yvar = torch.full_like(train_y, noise_std**2)
+            # store training data
+            data_by_task[task] = {
+                # scale x to [0, 1]
+                "train_x": normalize(raw_x, bounds=BOUNDS),
+                "train_y": train_y,
+                "train_yvar": train_yvar,
+            }
+
+        # Fit base model
+        base_model_list = []
+        for task in range(NUM_BASE_TASKS):
+            print(f"Fitting base model {task}")
+            model = self.get_fitted_model(
+                data_by_task[task]["train_x"],
+                data_by_task[task]["train_y"],
+                data_by_task[task]["train_yvar"],
+            )
+            base_model_list.append(model)
+
     def run_config(self, config: tuple):
         return super().run_config(config)
     
@@ -34,5 +99,303 @@ def initial_sample(self):
     def initialize_model(self, state_dict=None, exact=True):
         return super().initialize_model(state_dict, exact)
     
+    def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
+        """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""
+        model = SingleTaskGP(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
+        if state_dict is None:
+            mll = ExactMarginalLogLikelihood(model.likelihood, model).to(train_X)
+            fit_gpytorch_mll(mll)
+        else:
+            model.load_state_dict(state_dict)
+        return model
+    
+    def roll_col(self, X, shift):
+        """Rotate columns to right by shift."""
+        return torch.cat((X[..., -shift:], X[..., :-shift]), dim=-1)
+    
+    def compute_ranking_loss(self, f_samps, target_y):
+        """Compute ranking loss for each sample from the posterior over target points.
+
+        Args:
+            f_samps: `n_samples x (n) x n`-dim tensor of samples
+            target_y: `n x 1`-dim tensor of targets
+        Returns:
+            Tensor: `n_samples`-dim tensor containing the ranking loss across each sample
+        """
+        n = target_y.shape[0]
+        if f_samps.ndim == 3:
+            # Compute ranking loss for target model
+            # take cartesian product of target_y
+            cartesian_y = torch.cartesian_prod(
+                target_y.squeeze(-1),
+                target_y.squeeze(-1),
+            ).view(n, n, 2)
+            # the diagonal of f_samps are the out-of-sample predictions
+            # for each LOO model, compare the out of sample predictions to each in-sample prediction
+            rank_loss = (
+                (
+                    (f_samps.diagonal(dim1=1, dim2=2).unsqueeze(-1) < f_samps)
+                    ^ (cartesian_y[..., 0] < cartesian_y[..., 1])
+                )
+                .sum(dim=-1)
+                .sum(dim=-1)
+            )
+        else:
+            rank_loss = torch.zeros(
+                f_samps.shape[0], dtype=torch.long, device=target_y.device
+            )
+            y_stack = target_y.squeeze(-1).expand(f_samps.shape)
+            for i in range(1, target_y.shape[0]):
+                rank_loss += (
+                    (self.roll_col(f_samps, i) < f_samps) ^ (self.roll_col(y_stack, i) < y_stack)
+                ).sum(dim=-1)
+        return rank_loss
+    
+    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples):
+        """Create a batch-mode LOOCV GP and draw a joint sample across all points from the target task.
+
+        Args:
+            train_x: `n x d` tensor of training points
+            train_y: `n x 1` tensor of training targets
+            target_model: fitted target model
+            num_samples: number of mc samples to draw
+
+        Return: `num_samples x n x n`-dim tensor of samples, where dim=1 represents the `n` LOO models,
+            and dim=2 represents the `n` training points.
+        """
+        batch_size = len(train_x)
+        masks = torch.eye(len(train_x), dtype=torch.uint8, device=self.tensor_device).bool()
+        train_x_cv = torch.stack([train_x[~m] for m in masks])
+        train_y_cv = torch.stack([train_y[~m] for m in masks])
+        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks])
+        state_dict = target_model.state_dict()
+        # expand to batch size of batch_mode LOOCV model
+        state_dict_expanded = {
+            name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
+            for name, t in state_dict.items()
+        }
+        model = self.get_fitted_model(
+            train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
+        )
+        with torch.no_grad():
+            posterior = model.posterior(train_x)
+            # Since we have a batch mode gp and model.posterior always returns an output dimension,
+            # the output from `posterior.sample()` here `num_samples x n x n x 1`, so let's squeeze
+            # the last dimension.
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
+            return sampler(posterior).squeeze(-1)
+    
+    def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_samples):
+        """Compute ranking weights for each base model and the target model (using LOOCV for the target model).
+        
+        Note: This implementation does not currently address weight dilution, since we only have a small number of base models.
+
+        Args:
+            train_x: `n x d` tensor of training points (for target task)
+            train_y: `n` tensor of training targets (for target task)
+            base_models: list of base models
+            target_model: target model
+            num_samples: number of mc samples
+
+        Returns:
+            Tensor: `n_t`-dim tensor with the ranking weight for each model
+        """
+        ranking_losses = []
+        # compute ranking loss for each base model
+        for task in range(len(base_models)):
+            model = base_models[task]
+            # compute posterior over training points for target task
+            posterior = model.posterior(train_x)
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
+            base_f_samps = sampler(posterior).squeeze(-1).squeeze(-1)
+            # compute and save ranking loss
+            ranking_losses.append(self.compute_ranking_loss(base_f_samps, train_y))
+        # compute ranking loss for target model using LOOCV
+        # f_samps
+        target_f_samps = self.get_target_model_loocv_sample_preds(
+            train_x,
+            train_y,
+            train_yvar,
+            target_model,
+            num_samples,
+        )
+        ranking_losses.append(self.compute_ranking_loss(target_f_samps, train_y))
+        ranking_loss_tensor = torch.stack(ranking_losses)
+        # compute best model (minimum ranking loss) for each sample
+        best_models = torch.argmin(ranking_loss_tensor, dim=0)
+        # compute proportion of samples for which each model is best
+        rank_weights = (
+            best_models.bincount(minlength=len(ranking_losses)).type_as(train_x)
+            / num_samples
+        )
+        return rank_weights
+    
     def run(self, max_fevals: int, max_batch_size=2048):
-        return super().run(max_fevals, max_batch_size)
\ No newline at end of file
+        # Average over multiple trials
+        for trial in range(N_TRIALS):
+            print(f"Trial {trial + 1} of {N_TRIALS}")
+            best_rgpe = []
+            best_random = []
+            best_vanilla_nei = []
+            # Initial random observations
+            raw_x = draw_sobol_samples(
+                bounds=BOUNDS, n=RANDOM_INITIALIZATION_SIZE, q=1, seed=trial
+            ).squeeze(1)
+            train_x = normalize(raw_x, bounds=BOUNDS)
+            train_y_noiseless = f(raw_x)
+            train_y = train_y_noiseless + noise_std * torch.randn_like(train_y_noiseless)
+            train_yvar = torch.full_like(train_y, noise_std**2)
+            vanilla_nei_train_x = train_x.clone()
+            vanilla_nei_train_y = train_y.clone()
+            vanilla_nei_train_yvar = train_yvar.clone()
+            # keep track of the best observed point at each iteration
+            best_value = train_y.max().item()
+            best_rgpe.append(best_value)
+            best_random.append(best_value)
+            vanilla_nei_best_value = best_value
+            best_vanilla_nei.append(vanilla_nei_best_value)
+
+            # Run N_BATCH rounds of BayesOpt after the initial random batch
+            for iteration in range(N_BATCH):
+                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
+                model_list = base_model_list + [target_model]
+                rank_weights = self.compute_rank_weights(
+                    train_x,
+                    train_y,
+                    base_model_list,
+                    target_model,
+                    NUM_POSTERIOR_SAMPLES,
+                )
+
+                # create model and acquisition function
+                rgpe_model = RGPE(model_list, rank_weights)
+                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
+                qNEI = qLogNoisyExpectedImprovement(
+                    model=rgpe_model,
+                    X_baseline=train_x,
+                    sampler=sampler_qnei,
+                    prune_baseline=False,
+                )
+
+                # optimize
+                candidate, _ = optimize_acqf(
+                    acq_function=qNEI,
+                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
+                    q=Q_BATCH_SIZE,
+                    num_restarts=N_RESTARTS,
+                    raw_samples=N_RESTART_CANDIDATES,
+                )
+
+                # fetch the new values
+                new_x = candidate.detach()
+                new_y_noiseless = f(unnormalize(new_x, bounds=BOUNDS))
+                new_y = new_y_noiseless + noise_std * torch.randn_like(new_y_noiseless)
+                new_yvar = torch.full_like(new_y, noise_std**2)
+
+                # update training points
+                train_x = torch.cat((train_x, new_x))
+                train_y = torch.cat((train_y, new_y))
+                train_yvar = torch.cat((train_yvar, new_yvar))
+                random_candidate = torch.rand(1, **self.searchspace.tensor_kwargs)
+                next_random_noiseless = f(unnormalize(random_candidate, bounds=BOUNDS))
+                next_random = next_random_noiseless + noise_std * torch.randn_like(
+                    next_random_noiseless
+                )
+                next_random_best = next_random.max().item()
+                best_random.append(max(best_random[-1], next_random_best))
+
+                # get the new best observed value
+                best_value = train_y.max().item()
+                best_rgpe.append(best_value)
+
+                # Run Vanilla NEI for comparison
+                vanilla_nei_model = self.get_fitted_model(
+                    vanilla_nei_train_x,
+                    vanilla_nei_train_y,
+                    vanilla_nei_train_yvar,
+                )
+                vanilla_nei_sampler = SobolQMCNormalSampler(
+                    sample_shape=torch.Size([MC_SAMPLES])
+                )
+                vanilla_qNEI = qLogNoisyExpectedImprovement(
+                    model=vanilla_nei_model,
+                    X_baseline=vanilla_nei_train_x,
+                    sampler=vanilla_nei_sampler,
+                )
+                vanilla_nei_candidate, _ = optimize_acqf(
+                    acq_function=vanilla_qNEI,
+                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
+                    q=Q_BATCH_SIZE,
+                    num_restarts=N_RESTARTS,
+                    raw_samples=N_RESTART_CANDIDATES,
+                )
+                # fetch the new values
+                vanilla_nei_new_x = vanilla_nei_candidate.detach()
+                vanilla_nei_new_y_noiseless = f(unnormalize(vanilla_nei_new_x, bounds=BOUNDS))
+                vanilla_nei_new_y = vanilla_nei_new_y_noiseless + noise_std * torch.randn_like(
+                    new_y_noiseless
+                )
+                vanilla_nei_new_yvar = torch.full_like(vanilla_nei_new_y, noise_std**2)
+
+                # update training points
+                vanilla_nei_train_x = torch.cat([vanilla_nei_train_x, vanilla_nei_new_x])
+                vanilla_nei_train_y = torch.cat([vanilla_nei_train_y, vanilla_nei_new_y])
+                vanilla_nei_train_yvar = torch.cat(
+                    [vanilla_nei_train_yvar, vanilla_nei_new_yvar]
+                )
+
+                # get the new best observed value
+                vanilla_nei_best_value = vanilla_nei_train_y.max().item()
+                best_vanilla_nei.append(vanilla_nei_best_value)
+
+            self.best_rgpe_all.append(best_rgpe)
+            self.best_random_all.append(best_random)
+            self.best_vanilla_nei_all.append(best_vanilla_nei)
+
+
+class RGPE(GP, GPyTorchModel):
+    """Rank-weighted GP ensemble.
+    
+    Note: this class inherits from GPyTorchModel which provides an interface for GPyTorch models in botorch.
+    """
+
+    _num_outputs = 1  # metadata for botorch
+
+    def __init__(self, models, weights):
+        super().__init__()
+        self.models = ModuleList(models)
+        for m in models:
+            if not hasattr(m, "likelihood"):
+                raise ValueError(
+                    "RGPE currently only supports models that have a likelihood (e.g. ExactGPs)"
+                )
+        self.likelihood = LikelihoodList(*[m.likelihood for m in models])
+        self.weights = weights
+        self.to(weights)
+
+    def forward(self, x):
+        weighted_means = []
+        weighted_covars = []
+        # filter model with zero weights
+        # weights on covariance matrices are weight**2
+        non_zero_weight_indices = (self.weights**2 > 0).nonzero()
+        non_zero_weights = self.weights[non_zero_weight_indices]
+        # re-normalize
+        non_zero_weights /= non_zero_weights.sum()
+
+        for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
+            raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
+            model = self.models[raw_idx]
+            posterior = model.posterior(x)
+            # unstandardize predictions
+            posterior_mean = posterior.mean.squeeze(-1)
+            posterior_cov = posterior.mvn.lazy_covariance_matrix
+            # apply weight
+            weight = non_zero_weights[non_zero_weight_idx]
+            weighted_means.append(weight * posterior_mean)
+            weighted_covars.append(posterior_cov * weight**2)
+        # set mean and covariance to be the rank-weighted sum the means and covariances of the
+        # base models and target model
+        mean_x = torch.stack(weighted_means).sum(dim=0)
+        covar_x = PsdSumLazyTensor(*weighted_covars)
+        return MultivariateNormal(mean_x, covar_x)
\ No newline at end of file

From fec0e65d706a66269a1a17d156908ab615f0c6fd Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 23:02:03 -0800
Subject: [PATCH 078/168] Avoided import of whole util submodule

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 689d64183..45ac275a9 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -27,9 +27,9 @@
 import gpytorch.settings as gp_settings
 import linear_operator.settings as linop_settings
 
-from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import CostFunc
+from kernel_tuner.util import ErrorConfig, StopCriterionReached
 
 # set gpytorch to approximate mode for faster fitting
 linop_settings._fast_covar_root_decomposition._default = True
@@ -75,7 +75,7 @@ def run_config(self, config: tuple):
         result, results = self.cost_func(config)
         results = np.array(results)
         var = np.nan
-        valid = not isinstance(result, util.ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
+        valid = not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
         if not valid:
             result = np.nan
         elif not self.maximize:
@@ -229,7 +229,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 # reinitialize the models so they are ready for fitting on next iteration
                 if loop_i < len(nums_optimization_spaces) - 1:
                     mll, model = self.initialize_model(model.state_dict())
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)
 

From 091ef47ac0dc361a60429633730a79a390076f45 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 23:55:06 -0800
Subject: [PATCH 079/168] Simplified BO transfer run loop

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 168 ++++++------------
 1 file changed, 54 insertions(+), 114 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 885bc6708..f8106658a 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -2,11 +2,11 @@
 
 try:
     import torch
-    from botorch.acquisition.logei import qLogNoisyExpectedImprovement
-    from botorch.fit import fit_gpytorch_mll
+    from botorch.acquisition import LogExpectedImprovement
+    from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
     from botorch.models import SingleTaskGP
     from botorch.models.gpytorch import GPyTorchModel
-    from botorch.optim.optimize import optimize_acqf
+    from botorch.optim.optimize import optimize_acqf_discrete, optimize_acqf_discrete_local_search
     from botorch.sampling.normal import SobolQMCNormalSampler
     from botorch.utils.sampling import draw_sobol_samples
     from botorch.utils.transforms import normalize, unnormalize
@@ -23,6 +23,7 @@
 
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+from kernel_tuner.util import StopCriterionReached
 
 # settings
 NUM_BASE_TASKS = 5
@@ -201,6 +202,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             Tensor: `n_t`-dim tensor with the ranking weight for each model
         """
         ranking_losses = []
+        
         # compute ranking loss for each base model
         for task in range(len(base_models)):
             model = base_models[task]
@@ -210,6 +212,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             base_f_samps = sampler(posterior).squeeze(-1).squeeze(-1)
             # compute and save ranking loss
             ranking_losses.append(self.compute_ranking_loss(base_f_samps, train_y))
+
         # compute ranking loss for target model using LOOCV
         # f_samps
         target_f_samps = self.get_target_model_loocv_sample_preds(
@@ -231,35 +234,19 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
         return rank_weights
     
     def run(self, max_fevals: int, max_batch_size=2048):
-        # Average over multiple trials
-        for trial in range(N_TRIALS):
-            print(f"Trial {trial + 1} of {N_TRIALS}")
-            best_rgpe = []
-            best_random = []
-            best_vanilla_nei = []
-            # Initial random observations
-            raw_x = draw_sobol_samples(
-                bounds=BOUNDS, n=RANDOM_INITIALIZATION_SIZE, q=1, seed=trial
-            ).squeeze(1)
-            train_x = normalize(raw_x, bounds=BOUNDS)
-            train_y_noiseless = f(raw_x)
-            train_y = train_y_noiseless + noise_std * torch.randn_like(train_y_noiseless)
-            train_yvar = torch.full_like(train_y, noise_std**2)
-            vanilla_nei_train_x = train_x.clone()
-            vanilla_nei_train_y = train_y.clone()
-            vanilla_nei_train_yvar = train_yvar.clone()
-            # keep track of the best observed point at each iteration
-            best_value = train_y.max().item()
-            best_rgpe.append(best_value)
-            best_random.append(best_value)
-            vanilla_nei_best_value = best_value
-            best_vanilla_nei.append(vanilla_nei_best_value)
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
+            mll, model = self.initialize_model()
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # Bayesian optimization loop
+            for _ in range(fevals_left):
 
-            # Run N_BATCH rounds of BayesOpt after the initial random batch
-            for iteration in range(N_BATCH):
-                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
+                target_model = get_fitted_model(train_x, train_y, train_yvar)
                 model_list = base_model_list + [target_model]
-                rank_weights = self.compute_rank_weights(
+                rank_weights = compute_rank_weights(
                     train_x,
                     train_y,
                     base_model_list,
@@ -267,90 +254,43 @@ def run(self, max_fevals: int, max_batch_size=2048):
                     NUM_POSTERIOR_SAMPLES,
                 )
 
-                # create model and acquisition function
-                rgpe_model = RGPE(model_list, rank_weights)
-                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
-                qNEI = qLogNoisyExpectedImprovement(
-                    model=rgpe_model,
-                    X_baseline=train_x,
-                    sampler=sampler_qnei,
-                    prune_baseline=False,
-                )
-
-                # optimize
-                candidate, _ = optimize_acqf(
-                    acq_function=qNEI,
-                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
-                    q=Q_BATCH_SIZE,
-                    num_restarts=N_RESTARTS,
-                    raw_samples=N_RESTART_CANDIDATES,
-                )
-
-                # fetch the new values
-                new_x = candidate.detach()
-                new_y_noiseless = f(unnormalize(new_x, bounds=BOUNDS))
-                new_y = new_y_noiseless + noise_std * torch.randn_like(new_y_noiseless)
-                new_yvar = torch.full_like(new_y, noise_std**2)
-
-                # update training points
-                train_x = torch.cat((train_x, new_x))
-                train_y = torch.cat((train_y, new_y))
-                train_yvar = torch.cat((train_yvar, new_yvar))
-                random_candidate = torch.rand(1, **self.searchspace.tensor_kwargs)
-                next_random_noiseless = f(unnormalize(random_candidate, bounds=BOUNDS))
-                next_random = next_random_noiseless + noise_std * torch.randn_like(
-                    next_random_noiseless
-                )
-                next_random_best = next_random.max().item()
-                best_random.append(max(best_random[-1], next_random_best))
-
-                # get the new best observed value
-                best_value = train_y.max().item()
-                best_rgpe.append(best_value)
-
-                # Run Vanilla NEI for comparison
-                vanilla_nei_model = self.get_fitted_model(
-                    vanilla_nei_train_x,
-                    vanilla_nei_train_y,
-                    vanilla_nei_train_yvar,
-                )
-                vanilla_nei_sampler = SobolQMCNormalSampler(
-                    sample_shape=torch.Size([MC_SAMPLES])
-                )
-                vanilla_qNEI = qLogNoisyExpectedImprovement(
-                    model=vanilla_nei_model,
-                    X_baseline=vanilla_nei_train_x,
-                    sampler=vanilla_nei_sampler,
-                )
-                vanilla_nei_candidate, _ = optimize_acqf(
-                    acq_function=vanilla_qNEI,
-                    bounds=torch.tensor([[0.0], [1.0]], **self.searchspace.tensor_kwargs),
-                    q=Q_BATCH_SIZE,
-                    num_restarts=N_RESTARTS,
-                    raw_samples=N_RESTART_CANDIDATES,
-                )
-                # fetch the new values
-                vanilla_nei_new_x = vanilla_nei_candidate.detach()
-                vanilla_nei_new_y_noiseless = f(unnormalize(vanilla_nei_new_x, bounds=BOUNDS))
-                vanilla_nei_new_y = vanilla_nei_new_y_noiseless + noise_std * torch.randn_like(
-                    new_y_noiseless
-                )
-                vanilla_nei_new_yvar = torch.full_like(vanilla_nei_new_y, noise_std**2)
-
-                # update training points
-                vanilla_nei_train_x = torch.cat([vanilla_nei_train_x, vanilla_nei_new_x])
-                vanilla_nei_train_y = torch.cat([vanilla_nei_train_y, vanilla_nei_new_y])
-                vanilla_nei_train_yvar = torch.cat(
-                    [vanilla_nei_train_yvar, vanilla_nei_new_yvar]
-                )
-
-                # get the new best observed value
-                vanilla_nei_best_value = vanilla_nei_train_y.max().item()
-                best_vanilla_nei.append(vanilla_nei_best_value)
-
-            self.best_rgpe_all.append(best_rgpe)
-            self.best_random_all.append(best_random)
-            self.best_vanilla_nei_all.append(best_vanilla_nei)
+                # fit a Gaussian Process model
+                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
+                
+                # define the acquisition function
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                
+                # optimize acquisition function to find the next evaluation point
+                if max_batch_size < self.searchspace_tensors.size(0):
+                    # optimize over a lattice if the space is too large
+                    candidate, _ = optimize_acqf_discrete_local_search(
+                        acqf,
+                        q=1,
+                        discrete_choices=self.searchspace_tensors,
+                        max_batch_size=max_batch_size,
+                        num_restarts=5,
+                        raw_samples=1024
+                    )
+                else:
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=self.searchspace_tensors,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    self.evaluate_configs(candidate)
+                    fevals_left -= 1
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                if fevals_left > 0:
+                    mll, model = self.initialize_model(model.state_dict())
+        except StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
+
+        return self.cost_func.results
 
 
 class RGPE(GP, GPyTorchModel):

From ee11757760d947bfe49e06d7e7c75bd6b578e9d4 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 20 Nov 2024 23:56:05 -0800
Subject: [PATCH 080/168] Implemented transfer learning caches in interface to
 be read and passed through tuning_options

---
 kernel_tuner/interface.py | 32 +++++++++++++++++++++++++++-----
 tune_bo_conv.py           |  7 ++++++-
 tune_bo_dedisp.py         | 12 ++++++++----
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index f48d105dc..56fd7d883 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -477,6 +477,15 @@ def __deepcopy__(self, _):
                 "string",
             ),
         ),
+        (
+            "transfer_learning_caches",
+            (
+                """Array of filepaths to caches to use for transfer learning.
+        Filename uses suffix ".json", which is appended if missing.
+        """,
+                "list(string) or list(Path)",
+            ),
+        ),
         ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
         ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
         ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
@@ -593,6 +602,7 @@ def tune_kernel(
     observers=None,
     objective=None,
     objective_higher_is_better=None,
+    transfer_learning_caches=[],
 ):
     start_overhead_time = perf_counter()
     if log:
@@ -679,18 +689,30 @@ def tune_kernel(
     # we normalize it so that it always accepts atol.
     tuning_options.verify = util.normalize_verify_function(tuning_options.verify)
 
+    def preprocess_cache(filepath):
+        if isinstance(filepath, Path):
+            filepath = str(filepath.resolve())
+        if filepath[-5:] != ".json":
+            filepath += ".json"
+        return filepath
+
     # process cache
     if cache:
-        if isinstance(cache, Path):
-            cache = str(cache.resolve())
-        if cache[-5:] != ".json":
-            cache += ".json"
-
+        cache = preprocess_cache(cache)
         util.process_cache(cache, kernel_options, tuning_options, runner)
     else:
         tuning_options.cache = {}
         tuning_options.cachefile = None
 
+    # process transfer learning caches
+    tuning_options.transfer_learning_caches = []
+    if transfer_learning_caches and len(transfer_learning_caches) > 0:
+        for transfer_learning_cache in transfer_learning_caches:
+            cache = preprocess_cache(transfer_learning_cache)
+            assert cache != tuning_options.cache, "Transfer learning cache can not be the same as current cache"
+            cache_data = util.read_cache(cache, open_cache=False)
+            tuning_options.transfer_learning_caches.append(cache_data)
+
     # create search space
     searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads)
     restrictions = searchspace._modified_restrictions
diff --git a/tune_bo_conv.py b/tune_bo_conv.py
index 03ee7f2fa..86a64ac8b 100644
--- a/tune_bo_conv.py
+++ b/tune_bo_conv.py
@@ -101,6 +101,10 @@ def tune(
     metrics = OrderedDict()
     metrics["GFLOP/s"] = lambda p: total_flops / (p["time"] / 1000.0)
 
+    cache_dir = directory / "cachefiles/convolution_milo"
+    cache_filename = f"{device_name}.json"
+    transfer_learning_caches = [p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filename]
+
     def run():
         return kernel_tuner.tune_kernel(
             "convolution_kernel",
@@ -112,7 +116,7 @@ def run():
             grid_div_x=grid_div_x,
             cmem_args=cmem_args,
             restrictions=restrict,
-            cache=directory / f"cachefiles/convolution_milo/{device_name}.json",
+            cache=cache_dir / cache_filename,
             metrics=metrics,
             lang=lang,
             iterations=32,
@@ -122,6 +126,7 @@ def run():
             strategy=strategy,
             strategy_options=strategy_options,
             simulation_mode=simulation_mode,
+            transfer_learning_caches=transfer_learning_caches
         )
 
     # start tuning
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
index 2cfb3b58b..ed41d729f 100644
--- a/tune_bo_dedisp.py
+++ b/tune_bo_dedisp.py
@@ -20,7 +20,7 @@
 max_freq = min_freq + (nr_channels-1) * channel_bandwidth
 
 
-def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+def tune(device_name, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
 
     args = []
 
@@ -55,7 +55,10 @@ def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals':
     metrics["GB/s"] = lambda p: gbytes / (p['time'] / 1e3)
 
     directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
-    cachefile = directory / f"cachefiles/dedispersion_milo/{device}.json"
+    cache_dir = directory / "cachefiles/dedispersion_milo"
+    cache_filename = f"{device_name}.json"
+    transfer_learning_caches = [p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filename]
+
     assert directory.exists()
     if lang == "CUDA":
         kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu"
@@ -67,8 +70,9 @@ def tune(device, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals':
     def run():
         return kt.tune_kernel("dedispersion_kernel", kernel_file, problem_size, args, tune_params,
                                 answer=answer, compiler_options=cp, restrictions=config_valid, device=0,
-                                cache=cachefile, lang=lang, iterations=32, metrics=metrics, 
-                                simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, strategy_options=strategy_options)
+                                cache=cache_dir / cache_filename, lang=lang, iterations=32, metrics=metrics, 
+                                simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, 
+                                strategy_options=strategy_options, transfer_learning_caches=transfer_learning_caches)
     
     # start tuning
     if profiling:

From 1162ece8c0f12edc9d2e156b234bfd7cefd28075 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 19:26:29 -0800
Subject: [PATCH 081/168] Added BO transfer learning strategy

---
 kernel_tuner/interface.py                            |  2 ++
 .../strategies/bayes_opt_BOTorch_transfer.py         | 12 +++++++++---
 tune_bo_conv.py                                      |  3 +--
 tune_bo_dedisp.py                                    |  2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 56fd7d883..54aa737f6 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,6 +52,7 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
+    bayes_opt_BOTorch_transfer,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -91,6 +92,7 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
+    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer,
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index f8106658a..3457a3a4e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -40,7 +40,7 @@
 def tune(searchspace: Searchspace, runner, tuning_options):
     """The entry function for tuning a searchspace using this algorithm."""
     max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimization(searchspace, runner, tuning_options)
+    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
     return bo.run(max_fevals)
 
 class BayesianOptimizationTransfer(BayesianOptimization):
@@ -49,6 +49,12 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
+        self.searchspaces_transfer_learning = []
+        for tl_cache in tuning_options.transfer_learning_caches:
+            self.searchspaces_transfer_learning.append(Searchspace(None, None, None, from_cache=tl_cache))
+
+        raise ValueError(self.searchspaces_transfer_learning)
+
         self.best_rgpe_all = []
         self.best_random_all = []
         self.best_vanilla_nei_all = []
@@ -202,7 +208,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             Tensor: `n_t`-dim tensor with the ranking weight for each model
         """
         ranking_losses = []
-        
+
         # compute ranking loss for each base model
         for task in range(len(base_models)):
             model = base_models[task]
@@ -244,7 +250,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
             # Bayesian optimization loop
             for _ in range(fevals_left):
 
-                target_model = get_fitted_model(train_x, train_y, train_yvar)
+                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
                 model_list = base_model_list + [target_model]
                 rank_weights = compute_rank_weights(
                     train_x,
diff --git a/tune_bo_conv.py b/tune_bo_conv.py
index 86a64ac8b..61635c51f 100644
--- a/tune_bo_conv.py
+++ b/tune_bo_conv.py
@@ -21,10 +21,9 @@ def ops(w, h, fw, fh):
 total_flops = ops(w, h, fw, fh)
 
 
-# def tune(inputs, lang, strategy):
 def tune(
     device_name: str,
-    strategy="bayes_opt_BOTorch",
+    strategy="bayes_opt_BOTorch_transfer",
     strategy_options={ 'max_fevals': 150 },
     verbose=True,
     quiet=False,
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
index ed41d729f..67a56c17e 100644
--- a/tune_bo_dedisp.py
+++ b/tune_bo_dedisp.py
@@ -20,7 +20,7 @@
 max_freq = min_freq + (nr_channels-1) * channel_bandwidth
 
 
-def tune(device_name, strategy="bayes_opt_BOTorch", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+def tune(device_name, strategy="bayes_opt_BOTorch_transfer", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
 
     args = []
 

From 62fa13587309fe5acbd1e71f869f5c693833fd24 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 19:27:01 -0800
Subject: [PATCH 082/168] Implemented optionally constructing a searchspace
 from a cache dictionary

---
 kernel_tuner/searchspace.py | 73 +++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 201052e8d..281bd2009 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -42,6 +42,7 @@ def __init__(
         block_size_names=default_block_size_names,
         build_neighbors_index=False,
         neighbor_method=None,
+        from_cache: dict=None,
         framework="PythonConstraint",
         solver_method="PC_OptimizedBacktrackingSolver",
         path_to_ATF_cache: Path = None,
@@ -53,7 +54,15 @@ def __init__(
             adjacent: picks closest parameter value in both directions for each parameter
             Hamming: any parameter config with 1 different parameter value is a neighbor
         Optionally sort the searchspace by the order in which the parameter values were specified. By default, sort goes from first to last parameter, to reverse this use sort_last_param_first.
+        Optionally an imported cache can be used instead with `from_cache`, in which case the `tune_params`, `restrictions` and `max_threads` arguments can be set to None, and construction is skipped.
         """
+        # check the arguments
+        if from_cache is not None:
+            assert tune_params is None and restrictions is None and max_threads is None, "When `from_cache` is used, the positional arguments must be set to None."
+            tune_params = from_cache["tune_params"]
+        if from_cache is None:
+            assert tune_params is not None and restrictions is not None and max_threads is not None, "Must specify positional arugments ."
+
         # set the object attributes using the arguments
         framework_l = framework.lower()
         restrictions = restrictions if restrictions is not None else []
@@ -96,36 +105,44 @@ def __init__(
                 try_to_constraint=framework_l == "pythonconstraint",
             )
 
-        # get the framework given the framework argument
-        if framework_l == "pythonconstraint":
-            searchspace_builder = self.__build_searchspace
-        elif framework_l == "pysmt":
-            searchspace_builder = self.__build_searchspace_pysmt
-        elif framework_l == "pyatf":
-            searchspace_builder = self.__build_searchspace_pyATF
-        elif framework_l == "atf_cache":
-            searchspace_builder = self.__build_searchspace_ATF_cache
-            self.path_to_ATF_cache = path_to_ATF_cache
-        elif framework_l == "bruteforce":
-            searchspace_builder = self.__build_searchspace_bruteforce
-        else:
-            raise ValueError(f"Invalid framework parameter {framework}")
-
-        # get the solver given the solver method argument
-        solver = ""
-        if solver_method.lower() == "pc_backtrackingsolver":
-            solver = BacktrackingSolver()
-        elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
-            solver = OptimizedBacktrackingSolver(forwardcheck=False)
-        elif solver_method.lower() == "pc_recursivebacktrackingsolver":
-            solver = RecursiveBacktrackingSolver()
-        elif solver_method.lower() == "pc_minconflictssolver":
-            solver = MinConflictsSolver()
+        # if an imported cache, skip building and set the values directly
+        if from_cache is not None:
+            configs = list(dict(from_cache["cache"]).keys())
+            self.list, self.__dict, self.size = None, None, len(configs)    # TODO
+            raise ValueError(configs)
         else:
-            raise ValueError(f"Solver method {solver_method} not recognized.")
+            # get the framework given the framework argument
+            if framework_l == "pythonconstraint":
+                searchspace_builder = self.__build_searchspace
+            elif framework_l == "pysmt":
+                searchspace_builder = self.__build_searchspace_pysmt
+            elif framework_l == "pyatf":
+                searchspace_builder = self.__build_searchspace_pyATF
+            elif framework_l == "atf_cache":
+                searchspace_builder = self.__build_searchspace_ATF_cache
+                self.path_to_ATF_cache = path_to_ATF_cache
+            elif framework_l == "bruteforce":
+                searchspace_builder = self.__build_searchspace_bruteforce
+            else:
+                raise ValueError(f"Invalid framework parameter {framework}")
+
+            # get the solver given the solver method argument
+            solver = ""
+            if solver_method.lower() == "pc_backtrackingsolver":
+                solver = BacktrackingSolver()
+            elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
+                solver = OptimizedBacktrackingSolver(forwardcheck=False)
+            elif solver_method.lower() == "pc_recursivebacktrackingsolver":
+                solver = RecursiveBacktrackingSolver()
+            elif solver_method.lower() == "pc_minconflictssolver":
+                solver = MinConflictsSolver()
+            else:
+                raise ValueError(f"Solver method {solver_method} not recognized.")
+
+            # build the search space
+            self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)
 
-        # build the search space
-        self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)
+        # finalize construction
         self.__numpy = None
         self.num_params = len(self.tune_params)
         self.indices = np.arange(self.size)

From 57a262f6cafdb62ca7239bc619fcad0b896244fc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 21:25:58 -0800
Subject: [PATCH 083/168] Implemented construction of Searchspaces from caches

---
 kernel_tuner/searchspace.py                           | 7 ++++---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py | 4 +---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 281bd2009..0cc444717 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -107,9 +107,10 @@ def __init__(
 
         # if an imported cache, skip building and set the values directly
         if from_cache is not None:
-            configs = list(dict(from_cache["cache"]).keys())
-            self.list, self.__dict, self.size = None, None, len(configs)    # TODO
-            raise ValueError(configs)
+            configs = dict(from_cache["cache"]).values()
+            self.list = list(tuple([v for p, v in c.items() if p in self.tune_params]) for c in configs)
+            self.size = len(self.list)
+            self.__dict = dict(zip(self.list, range(self.size)))
         else:
             # get the framework given the framework argument
             if framework_l == "pythonconstraint":
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 3457a3a4e..95ffc9670 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -49,12 +49,10 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
-        self.searchspaces_transfer_learning = []
+        self.searchspaces_transfer_learning: list[Searchspace] = []
         for tl_cache in tuning_options.transfer_learning_caches:
             self.searchspaces_transfer_learning.append(Searchspace(None, None, None, from_cache=tl_cache))
 
-        raise ValueError(self.searchspaces_transfer_learning)
-
         self.best_rgpe_all = []
         self.best_random_all = []
         self.best_vanilla_nei_all = []

From 964a6ee80bb2890e0887c8aa6e14642aea0c764a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 22:07:56 -0800
Subject: [PATCH 084/168] Transfer learning inputs and outcomes are represented
 in Tensors

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py        |  2 +-
 .../strategies/bayes_opt_BOTorch_transfer.py        | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 45ac275a9..85e877a4a 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -66,7 +66,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
         self.searchspace_tensors = searchspace.get_tensorspace()
         self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
-        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs)
+        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs) # TODO implement continuing from cache
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 95ffc9670..34f1e0c82 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -21,6 +21,7 @@
 except ImportError:
     bayes_opt_present = False
 
+
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
 from kernel_tuner.util import StopCriterionReached
@@ -49,9 +50,19 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
+        # get input and outcome data for each task
         self.searchspaces_transfer_learning: list[Searchspace] = []
+        self.inputs_transfer_learning: list[Tensor] = []
+        self.outcomes_transfer_learning: list[Tensor] = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            self.searchspaces_transfer_learning.append(Searchspace(None, None, None, from_cache=tl_cache))
+            tensor_kwargs = searchspace.tensor_kwargs
+            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
+            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
+            self.searchspaces_transfer_learning.append(tl_searchspace)
+            self.inputs_transfer_learning.append(tl_searchspace.get_tensorspace())
+            tl_outcomes = [c[tuning_options.objective] for c in tl_cache["cache"].values()]
+            self.outcomes_transfer_learning.append(torch.tensor(tl_outcomes, **tensor_kwargs))
+            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
 
         self.best_rgpe_all = []
         self.best_random_all = []

From 24c67670ffd8bfd17fc6bc99b3aa7913e2a64a99 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 22:30:19 -0800
Subject: [PATCH 085/168] More general approach to model and likelihood
 initialization to make it suitable for transfer learning

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 85e877a4a..1c3fea975 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -65,7 +65,6 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.searchspace = searchspace
         self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
         self.searchspace_tensors = searchspace.get_tensorspace()
-        self.bounds, self.bounds_indices = self.searchspace.get_tensorspace_bounds()
         self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs) # TODO implement continuing from cache
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
@@ -122,19 +121,17 @@ def initial_sample(self):
         self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
 
-    def initialize_model(self, state_dict=None, exact=True):
-        """Initialize the model and likelihood, possibly with a state dict for faster fitting."""
-        train_X = self.train_X
-        train_Y = self.train_Y
-        train_Yvar = self.train_Yvar
+    def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor=None, state_dict=None, exact=True):
+        """Initialize a model and likelihood, possibly with a state dict for faster fitting."""
+        bounds, bounds_indices = searchspace.get_tensorspace_bounds()
         transforms = dict(
-            input_transform=Normalize(d=train_X.shape[-1], indices=self.bounds_indices, bounds=self.bounds),
+            input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds),
             outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
         )
 
         # initialize the model
         if exact:
-            catdims = self.searchspace.get_tensorspace_categorical_dimensions()
+            catdims = searchspace.get_tensorspace_categorical_dimensions()
             if len(catdims) == 0:
                 model = SingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, **transforms)
             else:
@@ -151,14 +148,14 @@ def initialize_model(self, state_dict=None, exact=True):
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
         else:
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
-        return mll, model
+        return model, mll
 
     def run(self, max_fevals: int, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
-            mll, model = self.initialize_model()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
             fevals_left = max_fevals - self.initial_sample_size
 
             # create array to gradually reduce number of optimization spaces as fewer fevals are left
@@ -228,7 +225,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 if loop_i < len(nums_optimization_spaces) - 1:
-                    mll, model = self.initialize_model(model.state_dict())
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
         except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From dc4b4c78723f41fe7ec6b13361f7bc71bd4f58b4 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 22:58:13 -0800
Subject: [PATCH 086/168] Fitting a model for each base transfer learning task

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |  8 ++-
 .../strategies/bayes_opt_BOTorch_transfer.py  | 68 ++++---------------
 2 files changed, 20 insertions(+), 56 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index 1c3fea975..e250eafbb 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -149,6 +149,10 @@ def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, tr
         else:
             mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
         return model, mll
+    
+    def fit(self, mll):
+        """Fit a Marginal Log Likelihood."""
+        return fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
 
     def run(self, max_fevals: int, max_batch_size=2048):
         """Run the Bayesian Optimization loop for at most `max_fevals`."""
@@ -176,8 +180,8 @@ def run(self, max_fevals: int, max_batch_size=2048):
             for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
                 num_optimization_spaces = min(num_optimization_spaces, fevals_left)
 
-                # fit a Gaussian Process model
-                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
+                # fit on a Gaussian Process model
+                mll = self.fit(mll)
                 
                 # define the acquisition function
                 acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 34f1e0c82..527dcd409 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -50,70 +50,30 @@ class BayesianOptimizationTransfer(BayesianOptimization):
     def __init__(self, searchspace: Searchspace, runner, tuning_options):
         super().__init__(searchspace, runner, tuning_options)
 
-        # get input and outcome data for each task
+        # set up the data and model for each transfer learning base task
         self.searchspaces_transfer_learning: list[Searchspace] = []
         self.inputs_transfer_learning: list[Tensor] = []
         self.outcomes_transfer_learning: list[Tensor] = []
+        self.models_mlls_transfer_learning: list[tuple] = []
         for tl_cache in tuning_options.transfer_learning_caches:
+            # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
             tl_searchspace.initialize_tensorspace(**tensor_kwargs)
             self.searchspaces_transfer_learning.append(tl_searchspace)
-            self.inputs_transfer_learning.append(tl_searchspace.get_tensorspace())
-            tl_outcomes = [c[tuning_options.objective] for c in tl_cache["cache"].values()]
-            self.outcomes_transfer_learning.append(torch.tensor(tl_outcomes, **tensor_kwargs))
-            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
-
-        self.best_rgpe_all = []
-        self.best_random_all = []
-        self.best_vanilla_nei_all = []
-        self.noise_std = 0.05
 
-        # Sample data for each base task
-        data_by_task = {}
-        for task in range(NUM_BASE_TASKS):
-            num_training_points = 20
-            # draw points from a sobol sequence
-            raw_x = draw_sobol_samples(
-                bounds=BOUNDS,
-                n=num_training_points,
-                q=1,
-                seed=task + 5397923,
-            ).squeeze(1)
-            # get observed values
-            f_x = f(raw_x, task_shift(task + 1))
-            train_y = f_x + noise_std * torch.randn_like(f_x)
-            train_yvar = torch.full_like(train_y, noise_std**2)
-            # store training data
-            data_by_task[task] = {
-                # scale x to [0, 1]
-                "train_x": normalize(raw_x, bounds=BOUNDS),
-                "train_y": train_y,
-                "train_yvar": train_yvar,
-            }
-
-        # Fit base model
-        base_model_list = []
-        for task in range(NUM_BASE_TASKS):
-            print(f"Fitting base model {task}")
-            model = self.get_fitted_model(
-                data_by_task[task]["train_x"],
-                data_by_task[task]["train_y"],
-                data_by_task[task]["train_yvar"],
-            )
-            base_model_list.append(model)
+            # get the inputs and outcomes for this task
+            tl_inputs = tl_searchspace.get_tensorspace()
+            self.inputs_transfer_learning.append(tl_inputs)
+            tl_outcomes = torch.tensor([c[tuning_options.objective] for c in tl_cache["cache"].values()], **tensor_kwargs).unsqueeze(-1)
+            self.outcomes_transfer_learning.append(tl_outcomes)
+            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
 
-    def run_config(self, config: tuple):
-        return super().run_config(config)
-    
-    def evaluate_configs(self, X: Tensor):
-        return super().evaluate_configs(X)
-    
-    def initial_sample(self):
-        return super().initial_sample()
-    
-    def initialize_model(self, state_dict=None, exact=True):
-        return super().initialize_model(state_dict, exact)
+            # fit a model and likelihood for this task
+            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
+            mll = self.fit(mll)
+            self.models_mlls_transfer_learning.append((model, mll))
+        raise ValueError(self.models_mlls_transfer_learning)
     
     def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
         """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""

From e21a605694fc4c896e31b59afa75c0f18d6ee09c Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 21 Nov 2024 23:48:43 -0800
Subject: [PATCH 087/168] Account for invalid configurations in base task
 caches

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py     |  8 +++++++-
 .../strategies/bayes_opt_BOTorch_transfer.py     | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index e250eafbb..fd558feea 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -69,12 +69,18 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
         self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
 
+    def is_valid_result(self, result, results=None):
+        """Returns whether the result is valid."""
+        if results is None:
+            results = []
+        return not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
+
     def run_config(self, config: tuple):
         """Run a single configuration. Returns the result and whether it is valid."""
         result, results = self.cost_func(config)
         results = np.array(results)
         var = np.nan
-        valid = not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
+        valid = self.is_valid_result(result, results)
         if not valid:
             result = np.nan
         elif not self.maximize:
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 527dcd409..e30a3a4db 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -56,6 +56,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.outcomes_transfer_learning: list[Tensor] = []
         self.models_mlls_transfer_learning: list[tuple] = []
         for tl_cache in tuning_options.transfer_learning_caches:
+            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
@@ -63,17 +64,24 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
             self.searchspaces_transfer_learning.append(tl_searchspace)
 
             # get the inputs and outcomes for this task
-            tl_inputs = tl_searchspace.get_tensorspace()
+            inputs = []
+            outcomes = []
+            for c in tl_cache["cache"].values():
+                result = c[tuning_options.objective]
+                if self.is_valid_result(result):
+                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
+                    inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    outcomes.append(result)
+            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
+            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
+            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
             self.inputs_transfer_learning.append(tl_inputs)
-            tl_outcomes = torch.tensor([c[tuning_options.objective] for c in tl_cache["cache"].values()], **tensor_kwargs).unsqueeze(-1)
             self.outcomes_transfer_learning.append(tl_outcomes)
-            assert self.inputs_transfer_learning[-1].shape[0] == self.outcomes_transfer_learning[-1].shape[0]
 
             # fit a model and likelihood for this task
             model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
             mll = self.fit(mll)
             self.models_mlls_transfer_learning.append((model, mll))
-        raise ValueError(self.models_mlls_transfer_learning)
     
     def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
         """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""

From e3cfe912759536afb21b140c3acb91d0f4e74f89 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 00:44:53 -0800
Subject: [PATCH 088/168] Implement main RGPE BO loop

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 87 ++++++++-----------
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index e30a3a4db..4030dbad2 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -2,7 +2,7 @@
 
 try:
     import torch
-    from botorch.acquisition import LogExpectedImprovement
+    from botorch.acquisition import LogExpectedImprovement, qLogNoisyExpectedImprovement
     from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
     from botorch.models import SingleTaskGP
     from botorch.models.gpytorch import GPyTorchModel
@@ -54,7 +54,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.searchspaces_transfer_learning: list[Searchspace] = []
         self.inputs_transfer_learning: list[Tensor] = []
         self.outcomes_transfer_learning: list[Tensor] = []
-        self.models_mlls_transfer_learning: list[tuple] = []
+        self.models_transfer_learning: list = []
         for tl_cache in tuning_options.transfer_learning_caches:
             print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
             # construct the searchspace for this task
@@ -81,17 +81,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
             # fit a model and likelihood for this task
             model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
             mll = self.fit(mll)
-            self.models_mlls_transfer_learning.append((model, mll))
-    
-    def get_fitted_model(self, train_X, train_Y, train_Yvar, state_dict=None):
-        """Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided."""
-        model = SingleTaskGP(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
-        if state_dict is None:
-            mll = ExactMarginalLogLikelihood(model.likelihood, model).to(train_X)
-            fit_gpytorch_mll(mll)
-        else:
-            model.load_state_dict(state_dict)
-        return model
+            self.models_transfer_learning.append(model)
     
     def roll_col(self, X, shift):
         """Rotate columns to right by shift."""
@@ -221,54 +211,53 @@ def run(self, max_fevals: int, max_batch_size=2048):
         try:
             if not self.initial_sample_taken:
                 self.initial_sample()
-            mll, model = self.initialize_model()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
             fevals_left = max_fevals - self.initial_sample_size
 
             # Bayesian optimization loop
             for _ in range(fevals_left):
 
-                target_model = self.get_fitted_model(train_x, train_y, train_yvar)
-                model_list = base_model_list + [target_model]
-                rank_weights = compute_rank_weights(
-                    train_x,
-                    train_y,
-                    base_model_list,
-                    target_model,
+                # fit a Gaussian Process model
+                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
+
+                # calculate the rank weights
+                model_list = self.models_transfer_learning + [model]
+                rank_weights = self.compute_rank_weights(
+                    self.train_X,
+                    self.train_Y,
+                    self.models_transfer_learning,
+                    model,
                     NUM_POSTERIOR_SAMPLES,
                 )
 
-                # fit a Gaussian Process model
-                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
-                
-                # define the acquisition function
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                
-                # optimize acquisition function to find the next evaluation point
-                if max_batch_size < self.searchspace_tensors.size(0):
-                    # optimize over a lattice if the space is too large
-                    candidate, _ = optimize_acqf_discrete_local_search(
-                        acqf,
-                        q=1,
-                        discrete_choices=self.searchspace_tensors,
-                        max_batch_size=max_batch_size,
-                        num_restarts=5,
-                        raw_samples=1024
-                    )
-                else:
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=self.searchspace_tensors,
-                        max_batch_size=max_batch_size
-                    )
+                # create rank model and acquisition function
+                rgpe_model = RGPE(model_list, rank_weights)
+                # acqf = LogExpectedImprovement(model=rgpe_model, best_f=self.train_Y.max(), maximize=True)
+                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
+                qNEI = qLogNoisyExpectedImprovement(
+                    model=rgpe_model,
+                    X_baseline=self.train_X,
+                    sampler=sampler_qnei,
+                    prune_baseline=False,
+                )
+
+                # optimize
+                candidate, _ = optimize_acqf_discrete_local_search(
+                    acq_function=qNEI,
+                    discrete_choices=self.searchspace_tensors,
+                    q=Q_BATCH_SIZE,
+                    num_restarts=N_RESTARTS,
+                    raw_samples=N_RESTART_CANDIDATES,
+                    max_batch_size=max_batch_size
+                )
                     
-                    # evaluate the new candidate
-                    self.evaluate_configs(candidate)
-                    fevals_left -= 1
+                # evaluate the new candidate
+                self.evaluate_configs(candidate)
+                fevals_left -= 1
 
                 # reinitialize the models so they are ready for fitting on next iteration
                 if fevals_left > 0:
-                    mll, model = self.initialize_model(model.state_dict())
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
         except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)

From 2334214e22a764c6a467653d2846301a1aea6a6d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 01:24:56 -0800
Subject: [PATCH 089/168] Improved the efficiency of taking initial sample

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index fd558feea..cd496120e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -122,10 +122,11 @@ def evaluate_configs(self, X: Tensor):
         
     def initial_sample(self):
         """Take an initial sample."""
-        sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
-        sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
-        self.evaluate_configs(sample_configs)
         self.initial_sample_taken = True
+        if self.initial_sample_size > 0:
+            sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
+            sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
+            self.evaluate_configs(sample_configs)
 
     def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor=None, state_dict=None, exact=True):
         """Initialize a model and likelihood, possibly with a state dict for faster fitting."""

From c78a18c29c726e4f4022135603fd32bf1cdc2eb9 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 01:27:48 -0800
Subject: [PATCH 090/168] Use of state dictionary is made optional

---
 .../strategies/bayes_opt_BOTorch_transfer.py  | 39 ++++++++++++-------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
index 4030dbad2..790f0f0c9 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
@@ -125,7 +125,7 @@ def compute_ranking_loss(self, f_samps, target_y):
                 ).sum(dim=-1)
         return rank_loss
     
-    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples):
+    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples, no_state=False):
         """Create a batch-mode LOOCV GP and draw a joint sample across all points from the target task.
 
         Args:
@@ -141,15 +141,22 @@ def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, targ
         masks = torch.eye(len(train_x), dtype=torch.uint8, device=self.tensor_device).bool()
         train_x_cv = torch.stack([train_x[~m] for m in masks])
         train_y_cv = torch.stack([train_y[~m] for m in masks])
-        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks])
-        state_dict = target_model.state_dict()
-        # expand to batch size of batch_mode LOOCV model
-        state_dict_expanded = {
-            name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
-            for name, t in state_dict.items()
-        }
-        model = self.get_fitted_model(
-            train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
+        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks]) if train_yvar is not None else None
+
+        # use a state dictionary for fast updates
+        if no_state:
+            state_dict_expanded = None
+        else:
+            state_dict = target_model.state_dict()
+
+            # expand to batch size of batch_mode LOOCV model
+            state_dict_expanded = {
+                name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
+                for name, t in state_dict.items()
+            }
+        
+        model, _ = self.get_model_and_likelihood(
+            self.searchspace, train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
         )
         with torch.no_grad():
             posterior = model.posterior(train_x)
@@ -159,7 +166,7 @@ def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, targ
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
             return sampler(posterior).squeeze(-1)
     
-    def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_samples):
+    def compute_rank_weights(self, train_x, train_y, train_yvar, base_models, target_model, num_samples, no_state=False):
         """Compute ranking weights for each base model and the target model (using LOOCV for the target model).
         
         Note: This implementation does not currently address weight dilution, since we only have a small number of base models.
@@ -177,8 +184,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
         ranking_losses = []
 
         # compute ranking loss for each base model
-        for task in range(len(base_models)):
-            model = base_models[task]
+        for model in base_models:
             # compute posterior over training points for target task
             posterior = model.posterior(train_x)
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
@@ -194,6 +200,7 @@ def compute_rank_weights(self, train_x, train_y, base_models, target_model, num_
             train_yvar,
             target_model,
             num_samples,
+            no_state=no_state,
         )
         ranking_losses.append(self.compute_ranking_loss(target_f_samps, train_y))
         ranking_loss_tensor = torch.stack(ranking_losses)
@@ -213,6 +220,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 self.initial_sample()
             model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
             fevals_left = max_fevals - self.initial_sample_size
+            first_loop = self.initial_sample_size > 0
 
             # Bayesian optimization loop
             for _ in range(fevals_left):
@@ -225,9 +233,11 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 rank_weights = self.compute_rank_weights(
                     self.train_X,
                     self.train_Y,
+                    self.train_Yvar,
                     self.models_transfer_learning,
                     model,
                     NUM_POSTERIOR_SAMPLES,
+                    no_state=first_loop,
                 )
 
                 # create rank model and acquisition function
@@ -258,6 +268,7 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 # reinitialize the models so they are ready for fitting on next iteration
                 if fevals_left > 0:
                     model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
+                    first_loop = False
         except StopCriterionReached as e:
             if self.tuning_options.verbose:
                 print(e)
@@ -310,4 +321,4 @@ def forward(self, x):
         # base models and target model
         mean_x = torch.stack(weighted_means).sum(dim=0)
         covar_x = PsdSumLazyTensor(*weighted_covars)
-        return MultivariateNormal(mean_x, covar_x)
\ No newline at end of file
+        return MultivariateNormal(mean_x, covar_x)

From 8416098b111de19c954a7f8079512419492ef380 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 02:44:59 -0800
Subject: [PATCH 091/168] Renamed RGPE strategy

---
 kernel_tuner/interface.py                              |  6 ++++--
 ..._transfer.py => bayes_opt_BOTorch_transfer_RGPE.py} | 10 +++-------
 2 files changed, 7 insertions(+), 9 deletions(-)
 rename kernel_tuner/strategies/{bayes_opt_BOTorch_transfer.py => bayes_opt_BOTorch_transfer_RGPE.py} (96%)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 54aa737f6..225439d30 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,7 +52,8 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
-    bayes_opt_BOTorch_transfer,
+    bayes_opt_BOTorch_transfer_RGPE,
+    bayes_opt_BOTorch_transfer_weighted,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -92,7 +93,8 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
-    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer,
+    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer_weighted,
+    "bayes_opt_BOTorch_transfer_RGPE": bayes_opt_BOTorch_transfer_RGPE,
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
similarity index 96%
rename from kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
rename to kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
index 790f0f0c9..9b42fc77e 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
@@ -1,19 +1,15 @@
-"""Bayesian Optimization implementation using BO Torch."""
+"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
 
 try:
     import torch
-    from botorch.acquisition import LogExpectedImprovement, qLogNoisyExpectedImprovement
+    from botorch.acquisition import qLogNoisyExpectedImprovement
     from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
-    from botorch.models import SingleTaskGP
     from botorch.models.gpytorch import GPyTorchModel
-    from botorch.optim.optimize import optimize_acqf_discrete, optimize_acqf_discrete_local_search
+    from botorch.optim.optimize import optimize_acqf_discrete_local_search
     from botorch.sampling.normal import SobolQMCNormalSampler
-    from botorch.utils.sampling import draw_sobol_samples
-    from botorch.utils.transforms import normalize, unnormalize
     from gpytorch.distributions import MultivariateNormal
     from gpytorch.lazy import PsdSumLazyTensor
     from gpytorch.likelihoods import LikelihoodList
-    from gpytorch.mlls import ExactMarginalLogLikelihood
     from gpytorch.models import GP
     from torch import Tensor
     from torch.nn import ModuleList

From dc000b70abeb35de3bbaaf1656a1b2de7350590f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 03:43:45 -0800
Subject: [PATCH 092/168] Implemented new transfer learning strategy with
 multiple independent GPs

---
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |   1 +
 .../bayes_opt_BOTorch_transfer_weighted.py    | 156 ++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
index cd496120e..5ee2854dc 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch.py
@@ -117,6 +117,7 @@ def evaluate_configs(self, X: Tensor):
                 self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
                 self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
                 self.train_Yvar = torch.cat([self.train_Yvar, torch.tensor(valid_vars, **self.searchspace.tensor_kwargs)])
+            return valid_results
         else:
             raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
         
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
new file mode 100644
index 000000000..e76c0793c
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -0,0 +1,156 @@
+"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
+
+try:
+    import torch
+    from botorch.acquisition import LogExpectedImprovement
+    from botorch.optim.optimize import optimize_acqf_discrete
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from math import ceil, sqrt
+
+import numpy as np
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+from kernel_tuner.util import StopCriterionReached
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimizationTransfer(BayesianOptimization):
+    """Bayesian Optimization class with transfer learning."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        super().__init__(searchspace, runner, tuning_options)
+
+        # set up the data and model for each transfer learning base task
+        self.searchspaces_transfer_learning: list[Searchspace] = []
+        self.inputs_transfer_learning: list[Tensor] = []
+        self.outcomes_transfer_learning: list[Tensor] = []
+        self.models_transfer_learning: list = []
+        for tl_cache in tuning_options.transfer_learning_caches:
+            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            # construct the searchspace for this task
+            tensor_kwargs = searchspace.tensor_kwargs
+            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
+            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
+            self.searchspaces_transfer_learning.append(tl_searchspace)
+
+            # get the inputs and outcomes for this task
+            inputs = []
+            outcomes = []
+            for c in tl_cache["cache"].values():
+                result = c[tuning_options.objective]
+                if self.is_valid_result(result):
+                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
+                    inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    outcomes.append(result)
+            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
+            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
+            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
+            self.inputs_transfer_learning.append(tl_inputs)
+            self.outcomes_transfer_learning.append(tl_outcomes)
+
+            # fit a model and likelihood for this task
+            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
+            mll = self.fit(mll)
+            self.models_transfer_learning.append(model)
+    
+    def run(self, max_fevals: int, max_batch_size=2048):
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # create array to gradually reduce number of optimization spaces as fewer fevals are left
+            tensorspace_size = self.searchspace_tensors.size(0)
+            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
+            fevals_left -= reserve_final_loops
+            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
+            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
+            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
+            # if there's a discrepency, add or subtract the difference from the first number
+            if np.sum(nums_optimization_spaces) != fevals_left:
+                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
+            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
+            fevals_left += reserve_final_loops
+
+            # create the acquisition functions for the transferred GPs
+            acqfs = [LogExpectedImprovement(model=m, best_f=self.outcomes_transfer_learning[i].max(), maximize=True) for i, m in enumerate(self.models_transfer_learning)]
+            acqfs_results = [list() for _ in acqfs]
+
+            # Bayesian optimization loop
+            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
+                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
+
+                # fit on a Gaussian Process model
+                mll = self.fit(mll)
+
+                # divide the optimization space into random chuncks
+                tensorspace_size = self.searchspace_tensors.size(0)
+                if num_optimization_spaces <= 1:
+                    optimization_spaces = [self.searchspace_tensors]
+                else:
+                    # shuffle the searchspace
+                    shuffled_indices = torch.randperm(tensorspace_size)
+                    tensorspace = self.searchspace_tensors[shuffled_indices]
+                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
+
+                # set which acqfuisition function is used at each point of the optimization space loop
+                if num_optimization_spaces > len(self.models_transfer_learning):
+                    # all models get a proportional turn
+                    selected_acqfs = np.linspace(start=0, stop=len(acqfs), num=num_optimization_spaces)
+                    selected_acqfs = selected_acqfs.round(0).astype(int)
+                    selected_acqfs = selected_acqfs.clip(0, len(acqfs)-1)
+                elif num_optimization_spaces == len(self.models_transfer_learning):
+                    # all models get one turn
+                    selected_acqfs = list(range(num_optimization_spaces))
+                elif num_optimization_spaces == 1:
+                    # only the target model is used
+                    selected_acqfs = [0]
+                else:
+                    # only select the target + best performing models (can include target as well)
+                    acqfs_means = np.array([np.mean(r) for r in acqfs_results])
+                    if not self.tuning_options["objective_higher_is_better"]:
+                        acqfs_means = -acqfs_means
+                    selected_acqfs = [0] + np.argpartition(acqfs_means, -num_optimization_spaces-1)[-num_optimization_spaces-1:]
+                    selected_acqfs = selected_acqfs.round(0).astype(int).clip(0, num_optimization_spaces-1)
+
+                # define the acquisition functions
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                current_acqfs = [acqf] + acqfs
+                
+                # optimize acquisition function to find the next evaluation point
+                for i, optimization_space in enumerate(optimization_spaces):
+                    acqfs_index = selected_acqfs[i]
+                    candidate, _ = optimize_acqf_discrete(
+                        current_acqfs[acqfs_index], 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    result = self.evaluate_configs(candidate)
+                    if len(result) == 1:
+                        acqfs_results[acqfs_index].append(result[0])
+                    fevals_left -= 1
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                if loop_i < len(nums_optimization_spaces) - 1:
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
+        except StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
+
+        return self.cost_func.results
\ No newline at end of file

From aa30ec2eceb7fde4e2b45d7967d6a6fd9a1ab299 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 22 Nov 2024 16:26:30 -0800
Subject: [PATCH 093/168] Removed redundant min/max results adjustment

---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
index e76c0793c..f424f41b6 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -121,8 +121,6 @@ def run(self, max_fevals: int, max_batch_size=2048):
                 else:
                     # only select the target + best performing models (can include target as well)
                     acqfs_means = np.array([np.mean(r) for r in acqfs_results])
-                    if not self.tuning_options["objective_higher_is_better"]:
-                        acqfs_means = -acqfs_means
                     selected_acqfs = [0] + np.argpartition(acqfs_means, -num_optimization_spaces-1)[-num_optimization_spaces-1:]
                     selected_acqfs = selected_acqfs.round(0).astype(int).clip(0, num_optimization_spaces-1)
 

From fd6f95ec51707a5b165b5e8fce4dd75a0189e592 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 26 Nov 2024 13:16:56 -0800
Subject: [PATCH 094/168] Result registration must be optimization direction
 dependent

---
 .../strategies/bayes_opt_BOTorch_transfer_weighted.py         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
index f424f41b6..1778d3ac2 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -51,6 +51,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
                 if self.is_valid_result(result):
                     config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
                     inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    if not self.maximize:
+                        result = -result
                     outcomes.append(result)
             tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
             tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
@@ -151,4 +153,4 @@ def run(self, max_fevals: int, max_batch_size=2048):
             if self.tuning_options.verbose:
                 print(e)
 
-        return self.cost_func.results
\ No newline at end of file
+        return self.cost_func.results

From 6963febd552d6d0438cfc7cd79fb52ce6eb39265 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 26 Nov 2024 13:17:27 -0800
Subject: [PATCH 095/168] Transfer learning by direct transfer of best
 configurations

---
 .../bayes_opt_BOTorch_transfer_direct.py      | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
new file mode 100644
index 000000000..ee552dcc7
--- /dev/null
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
@@ -0,0 +1,141 @@
+"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
+
+try:
+    import torch
+    from botorch.acquisition import LogExpectedImprovement
+    from botorch.optim.optimize import optimize_acqf_discrete
+    from torch import Tensor
+    bayes_opt_present = True
+except ImportError:
+    bayes_opt_present = False
+
+from math import ceil, sqrt
+
+import numpy as np
+
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
+from kernel_tuner.util import StopCriterionReached
+
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+    """The entry function for tuning a searchspace using this algorithm."""
+    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
+    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
+    return bo.run(max_fevals)
+
+class BayesianOptimizationTransfer(BayesianOptimization):
+    """Bayesian Optimization class with transfer learning."""
+
+    def __init__(self, searchspace: Searchspace, runner, tuning_options):
+        super().__init__(searchspace, runner, tuning_options)
+
+        # set up the data and model for each transfer learning base task
+        self.searchspaces_transfer_learning: list[Searchspace] = []
+        self.inputs_transfer_learning: list[Tensor] = []
+        self.outcomes_transfer_learning: list[Tensor] = []
+        for tl_cache in tuning_options.transfer_learning_caches:
+            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            # construct the searchspace for this task
+            tensor_kwargs = searchspace.tensor_kwargs
+            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
+            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
+            self.searchspaces_transfer_learning.append(tl_searchspace)
+
+            # get the inputs and outcomes for this task
+            inputs = []
+            outcomes = []
+            for c in tl_cache["cache"].values():
+                result = c[tuning_options.objective]
+                if self.is_valid_result(result):
+                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
+                    inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    if not self.maximize:
+                        result = -result
+                    outcomes.append(result)
+            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
+            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
+            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
+            self.inputs_transfer_learning.append(tl_inputs)
+            self.outcomes_transfer_learning.append(tl_outcomes)
+    
+    def run(self, max_fevals: int, max_batch_size=2048):
+        """Run the Bayesian Optimization loop for at most `max_fevals`."""
+        try:
+            if not self.initial_sample_taken:
+                self.initial_sample()
+            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
+            fevals_left = max_fevals - self.initial_sample_size
+
+            # create array to gradually reduce number of optimization spaces as fewer fevals are left
+            tensorspace_size = self.searchspace_tensors.size(0)
+            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
+            fevals_left -= reserve_final_loops
+            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
+            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
+            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
+            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
+            # if there's a discrepency, add or subtract the difference from the first number
+            if np.sum(nums_optimization_spaces) != fevals_left:
+                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
+            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
+            fevals_left += reserve_final_loops
+
+            # get the number of top configurations to select from transfer learning caches
+            num_tl_caches = len(self.outcomes_transfer_learning)
+            use_top_configs_until_loop = np.argmax(nums_optimization_spaces < num_tl_caches+1)  # stop after we have fewer num_optimization_spaces than caches - because with more caches the ACQF will not be used as much
+            num_top_configs = sum([ceil(n/(num_tl_caches+1)) for n in nums_optimization_spaces[:use_top_configs_until_loop]])
+
+            # select the top configurations for each of the transfer learning caches
+            top_configs = [list() for _ in self.outcomes_transfer_learning]
+            for tl_index, tl_outcomes in enumerate(self.outcomes_transfer_learning):
+                top_configs[tl_index] = self.inputs_transfer_learning[tl_outcomes.argmax()[:num_top_configs]] # TODO check if correct
+
+            # # if there are duplicate configurations, move them up and make sure there are only unique configs
+            # duplicate configurations are inserted at the num_configs-(sum of indices)th index
+            # # TODO
+
+            # Bayesian optimization loop
+            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
+                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
+
+                # calculate how many of the optimization spaces to optimize using GP
+                optimize_with_GP = max(round(num_optimization_spaces/(num_tl_caches+1)), 1)
+
+                # divide the optimization space into random chuncks
+                tensorspace_size = self.searchspace_tensors.size(0)
+                if num_optimization_spaces <= 1:
+                    optimization_spaces = [self.searchspace_tensors]
+                else:
+                    # shuffle the searchspace
+                    shuffled_indices = torch.randperm(tensorspace_size)
+                    tensorspace = self.searchspace_tensors[shuffled_indices]
+                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
+
+                # fit on a Gaussian Process model
+                mll = self.fit(mll)
+
+                # define the acquisition functions
+                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
+                
+                # optimize acquisition function to find the next evaluation point
+                for i, optimization_space in enumerate(optimization_spaces):
+                    candidate, _ = optimize_acqf_discrete(
+                        acqf, 
+                        q=1, 
+                        choices=optimization_space,
+                        max_batch_size=max_batch_size
+                    )
+                    
+                    # evaluate the new candidate
+                    self.evaluate_configs(candidate)
+                    fevals_left -= 1
+
+                # reinitialize the models so they are ready for fitting on next iteration
+                if loop_i < len(nums_optimization_spaces) - 1:
+                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
+        except StopCriterionReached as e:
+            if self.tuning_options.verbose:
+                print(e)
+
+        return self.cost_func.results

From a08953e6b9854f200dd84c0b0234fc98f190eddc Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 5 Mar 2025 22:11:50 +0100
Subject: [PATCH 096/168] BO update

---
 kernel_tuner/interface.py                             | 11 ++++++++++-
 .../strategies/bayes_opt_BOTorch_transfer_RGPE.py     |  2 ++
 tune_bo_conv.py                                       |  6 +++---
 tune_bo_dedisp.py                                     |  2 +-
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 225439d30..16af99250 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,6 +52,7 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
+    bayes_opt_BOTorch_transfer_direct,
     bayes_opt_BOTorch_transfer_RGPE,
     bayes_opt_BOTorch_transfer_weighted,
     bayes_opt_GPyTorch,
@@ -93,7 +94,8 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
-    "bayes_opt_BOTorch_transfer": bayes_opt_BOTorch_transfer_weighted,
+    "bayes_opt_BOTorch_transfer_direct": bayes_opt_BOTorch_transfer_direct,
+    "bayes_opt_BOTorch_transfer_weighted": bayes_opt_BOTorch_transfer_weighted,
     "bayes_opt_BOTorch_transfer_RGPE": bayes_opt_BOTorch_transfer_RGPE,
 }
 
@@ -910,8 +912,14 @@ def tune_kernel_T1(
     device = kernelspec["Device"]["Name"]
     strategy = inputs["Search"]["Name"]
 
+    # set the cache and transfer learning cache paths
     if cache_filepath is None and "SimulationInput" in kernelspec:
         cache_filepath = Path(kernelspec["SimulationInput"])
+    cache_dir = Path(cache_filepath).parent
+    # TODO remove in production!
+    transfer_learning_caches = [
+        p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filepath.name
+    ]
 
     # get the grid divisions
     grid_divs = {}
@@ -996,6 +1004,7 @@ def tune_kernel_T1(
         strategy_options=strategy_options,
         objective=objective,
         objective_higher_is_better=objective_higher_is_better,
+        transfer_learning_caches=transfer_learning_caches,
     )
     if output_T4:
         return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
index 9b42fc77e..c371eb889 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
@@ -67,6 +67,8 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
                 if self.is_valid_result(result):
                     config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
                     inputs.append(tl_searchspace.param_config_to_tensor(config))
+                    if not self.maximize:
+                        result = -result
                     outcomes.append(result)
             tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
             tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
diff --git a/tune_bo_conv.py b/tune_bo_conv.py
index 61635c51f..ec37fbf67 100644
--- a/tune_bo_conv.py
+++ b/tune_bo_conv.py
@@ -23,13 +23,13 @@ def ops(w, h, fw, fh):
 
 def tune(
     device_name: str,
-    strategy="bayes_opt_BOTorch_transfer",
+    strategy="bayes_opt_BOTorch_transfer_weighted",
     strategy_options={ 'max_fevals': 150 },
     verbose=True,
     quiet=False,
     simulation_mode=True,
     lang="CUDA",
-    profiling=False,
+    profiling=True,
 ):  
     directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/"
     assert directory.exists()
@@ -135,7 +135,7 @@ def run():
         with cProfile.Profile() as pr:
             results, env = run()
             if profiling:
-                pr.dump_stats('bo_prof.prof')
+                pr.dump_stats('bo_prof_tl2.prof')
     else:
         results, env = run()
 
diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py
index 67a56c17e..78b4b0474 100644
--- a/tune_bo_dedisp.py
+++ b/tune_bo_dedisp.py
@@ -20,7 +20,7 @@
 max_freq = min_freq + (nr_channels-1) * channel_bandwidth
 
 
-def tune(device_name, strategy="bayes_opt_BOTorch_transfer", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
+def tune(device_name, strategy="bayes_opt_BOTorch_transfer_weighted", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True):
 
     args = []
 

From ecd78021afb3596730d56d0adb7dc1478aa3171f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Wed, 5 Mar 2025 22:27:36 +0100
Subject: [PATCH 097/168] Improved conversion of tunable parameter

---
 kernel_tuner/interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 16af99250..8d83377f6 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -934,7 +934,7 @@ def tune_kernel_T1(
         tune_param = None
         if param["Type"] in ["int", "float"]:
             vals = param["Values"]
-            if vals[:5] == "list(" or (vals[0] == "[" and vals[-1] == "]"):
+            if "list(" in vals or "range(" in vals or (vals[0] == "[" and vals[-1] == "]"):
                 tune_param = eval(vals)
             else:
                 tune_param = literal_eval(vals)

From b7cda3619afb98706f8b6ea749662221cf5abb80 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 6 Mar 2025 08:19:53 +0100
Subject: [PATCH 098/168] Extended and improved conversion of T1 arguments,
 improved error reporting on problem size differences

---
 kernel_tuner/interface.py | 11 ++++++++++-
 kernel_tuner/util.py      | 21 +++++++++++++++------
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 8d83377f6..9dd964859 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -959,7 +959,11 @@ def tune_kernel_T1(
     cmem_arguments = {}
     for arg in kernelspec["Arguments"]:
         argument = None
-        if arg["Type"] == "float" and arg["MemoryType"] == "Vector":
+        if arg["MemoryType"] == "Vector":
+            if arg["Type"] != "float":
+                raise NotImplementedError(
+                    f"Conversion for vector type '{arg['Type']}' has not yet been implemented: {arg}"
+                )
             size = arg["Size"]
             if isinstance(size, str):
                 args = tune_params.copy()
@@ -973,6 +977,11 @@ def tune_kernel_T1(
                 argument = numpy.random.randn(size).astype(numpy.float32)
             else:
                 raise NotImplementedError(f"Conversion for fill type '{arg['FillType']}' has not yet been implemented")
+        elif arg["MemoryType"] == "Scalar":
+            if arg["Type"] == "float":
+                argument = numpy.float32(arg["FillValue"])
+            else:
+                raise NotImplementedError()
         if argument is not None:
             arguments.append(argument)
             if "MemType" in arg and arg["MemType"] == "Constant":
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index dac5d6de4..c8acfffde 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -85,7 +85,6 @@ def __init__(self):
         self.Tensor = Exception  # using Exception here as a type that will never be among kernel arguments
 
 
-
 class SkippableFailure(Exception):
     """Exception used to raise when compiling or launching a kernel fails for a reason that can be expected."""
 
@@ -889,7 +888,7 @@ def to_numeric_constraint(
         if len(comparators_found) != 1:
             return None
         comparator = comparators_found[0]
-    
+
         # split the string on the comparison and remove leading and trailing whitespace
         left, right = tuple(s.strip() for s in restriction.split(comparator))
 
@@ -1032,7 +1031,10 @@ def to_equality_constraint(
                 ):
                     parsed_restriction = parsed_restriction[1:-1]
                 # check if we can turn this into the built-in numeric comparison constraint
-                if all(all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param]) for param in params_used):
+                if all(
+                    all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param])
+                    for param in params_used
+                ):
                     finalized_constraint = to_numeric_constraint(parsed_restriction, params_used)
                 if finalized_constraint is None:
                     # check if we can turn this into the built-in equality comparison constraint
@@ -1080,7 +1082,10 @@ def compile_restrictions(
 ) -> list[tuple[Union[str, Constraint, FunctionType], list[str]]]:
     """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used, or a single Function if monolithic is true."""
     # change tuples consisting of strings and tunable parameters to only strings to compile
-    restrictions = [r[0] if isinstance(r, tuple) and len(r) == 2 and isinstance(r[0], str) and isinstance(r[1], list) else r for r in restrictions]
+    restrictions = [
+        r[0] if isinstance(r, tuple) and len(r) == 2 and isinstance(r[0], str) and isinstance(r[1], list) else r
+        for r in restrictions
+    ]
     # filter the restrictions to get only the strings
     restrictions_str, restrictions_ignore = [], []
     for r in restrictions:
@@ -1176,7 +1181,9 @@ def process_cache(cache, kernel_options, tuning_options, runner):
 
     # if file exists
     else:
-        cached_data = read_cache(cache, not tuning_options.simulation_mode)    # don't open the cache in (parallel) simulation mode to avoid race conditions
+        cached_data = read_cache(
+            cache, not tuning_options.simulation_mode
+        )  # don't open the cache in (parallel) simulation mode to avoid race conditions
 
         # if in simulation mode, use the device name from the cache file as the runner device name
         if runner.simulation_mode:
@@ -1203,7 +1210,9 @@ def process_cache(cache, kernel_options, tuning_options, runner):
             # cache returns list, problem_size is likely a tuple. Therefore, the next check
             # checks the equality of all items in the list/tuples individually
             elif not all([i == j for i, j in zip(cached_data["problem_size"], kernel_options.problem_size)]):
-                raise ValueError("Cannot load cache which contains results for different problem_size")
+                raise ValueError(
+                    f"Cannot load cache which contains results for different problem_size ({cached_data["problem_size"]=} != {kernel_options.problem_size=})"
+                )
         if cached_data["tune_params_keys"] != list(tuning_options.tune_params.keys()):
             if all(key in tuning_options.tune_params for key in cached_data["tune_params_keys"]):
                 raise ValueError(

From 8836ce20ebd5a941495a8e5b01df38bd7a340d0a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:20:04 +0100
Subject: [PATCH 099/168] Improved selection of transfer learning caches

---
 kernel_tuner/interface.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 9dd964859..38855dd90 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -918,7 +918,12 @@ def tune_kernel_T1(
     cache_dir = Path(cache_filepath).parent
     # TODO remove in production!
     transfer_learning_caches = [
-        p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filepath.name
+        p
+        for p in cache_dir.iterdir()
+        if len(p.suffixes) > 0
+        and p.suffixes[-1].endswith(".json")
+        and not p.stem.endswith("_T4")
+        and p.name != cache_filepath.name
     ]
 
     # get the grid divisions

From 539aed3e6f54d042c84339fd44fe4ae5110bedc2 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:25:46 +0100
Subject: [PATCH 100/168] Fixed an error with quotes in an f-string

---
 kernel_tuner/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index c8acfffde..9f7916dd7 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1211,7 +1211,7 @@ def process_cache(cache, kernel_options, tuning_options, runner):
             # checks the equality of all items in the list/tuples individually
             elif not all([i == j for i, j in zip(cached_data["problem_size"], kernel_options.problem_size)]):
                 raise ValueError(
-                    f"Cannot load cache which contains results for different problem_size ({cached_data["problem_size"]=} != {kernel_options.problem_size=})"
+                    f"Cannot load cache which contains results for different problem_size ({cached_data['problem_size']=} != {kernel_options.problem_size=})"
                 )
         if cached_data["tune_params_keys"] != list(tuning_options.tune_params.keys()):
             if all(key in tuning_options.tune_params for key in cached_data["tune_params_keys"]):

From 435b56bbd9ed45a9303668f0f3340d058edc2f87 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:28:34 +0100
Subject: [PATCH 101/168] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/searchspace.py | 65 ++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 0cc444717..2c9f87633 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -21,6 +21,7 @@
 try:
     import torch
     from torch import Tensor
+
     torch_available = True
 except ImportError:
     torch_available = False
@@ -42,7 +43,7 @@ def __init__(
         block_size_names=default_block_size_names,
         build_neighbors_index=False,
         neighbor_method=None,
-        from_cache: dict=None,
+        from_cache: dict = None,
         framework="PythonConstraint",
         solver_method="PC_OptimizedBacktrackingSolver",
         path_to_ATF_cache: Path = None,
@@ -58,10 +59,14 @@ def __init__(
         """
         # check the arguments
         if from_cache is not None:
-            assert tune_params is None and restrictions is None and max_threads is None, "When `from_cache` is used, the positional arguments must be set to None."
+            assert (
+                tune_params is None and restrictions is None and max_threads is None
+            ), "When `from_cache` is used, the positional arguments must be set to None."
             tune_params = from_cache["tune_params"]
         if from_cache is None:
-            assert tune_params is not None and restrictions is not None and max_threads is not None, "Must specify positional arugments ."
+            assert (
+                tune_params is not None and restrictions is not None and max_threads is not None
+            ), "Must specify positional arugments ."
 
         # set the object attributes using the arguments
         framework_l = framework.lower()
@@ -77,9 +82,9 @@ def __init__(
         self._tensorspace_param_config_structure = []
         self._map_tensor_to_param = {}
         self._map_param_to_tensor = {}
-        self.restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
+        self.restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
         # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads)
-        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, 'copy') else restrictions
+        self._modified_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions
         self.param_names = list(self.tune_params.keys())
         self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values())
         self.params_values_indices = None
@@ -93,8 +98,12 @@ def __init__(
         restrictions = [restrictions] if not isinstance(restrictions, list) else restrictions
         if (
             len(restrictions) > 0
-            and (any(isinstance(restriction, str) for restriction in restrictions)
-            or any(isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple)))
+            and (
+                any(isinstance(restriction, str) for restriction in restrictions)
+                or any(
+                    isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple)
+                )
+            )
             and not (framework_l == "pysmt" or framework_l == "bruteforce")
         ):
             self.restrictions = compile_restrictions(
@@ -609,14 +618,14 @@ def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]:
         # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices)
         return list(map(self.list.__getitem__, indices))
 
-    def get_param_config_index(self, param_config: Union[tuple, Tensor]):
+    def get_param_config_index(self, param_config: Union[tuple, any]):
         """Lookup the index for a parameter configuration, returns None if not found."""
         if torch_available and isinstance(param_config, Tensor):
             param_config = self.tensor_to_param_config(param_config)
         # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space
         return self.__dict.get(param_config, None)
-    
-    def initialize_tensorspace(self, dtype = None, device = None):
+
+    def initialize_tensorspace(self, dtype=None, device=None):
         """Encode the searchspace in a Tensor. Save the mapping. Call this function directly to control the precision or device used."""
         assert self._tensorspace is None, "Tensorspace is already initialized"
         skipped_count = 0
@@ -642,16 +651,16 @@ def initialize_tensorspace(self, dtype = None, device = None):
             if all(isinstance(v, numbers.Real) for v in param_values):
                 tensor_values = torch.tensor(param_values, dtype=self.tensor_dtype)
             else:
-                self._tensorspace_categorical_dimensions.append(index-skipped_count)
+                self._tensorspace_categorical_dimensions.append(index - skipped_count)
                 # tensor_values = np.arange(len(param_values))
                 tensor_values = torch.arange(len(param_values), dtype=self.tensor_dtype)
 
             # write the mappings to the object
-            self._map_param_to_tensor[index] = (dict(zip(param_values, tensor_values.tolist())))
-            self._map_tensor_to_param[index] = (dict(zip(tensor_values.tolist(), param_values)))
+            self._map_param_to_tensor[index] = dict(zip(param_values, tensor_values.tolist()))
+            self._map_tensor_to_param[index] = dict(zip(tensor_values.tolist(), param_values))
             bounds.append((tensor_values.min(), tensor_values.max()))
             if tensor_values.min() < tensor_values.max():
-                self._tensorspace_bounds_indices.append(index-skipped_count)
+                self._tensorspace_bounds_indices.append(index - skipped_count)
 
         # do some checks
         assert len(self.params_values) == len(self._tensorspace_param_config_structure)
@@ -666,18 +675,18 @@ def initialize_tensorspace(self, dtype = None, device = None):
 
         # set the bounds in the correct format (one array for the min, one for the max)
         bounds = torch.tensor(bounds, **self.tensor_kwargs)
-        self._tensorspace_bounds = torch.cat([bounds[:,0], bounds[:,1]]).reshape((2, bounds.shape[0]))
-    
+        self._tensorspace_bounds = torch.cat([bounds[:, 0], bounds[:, 1]]).reshape((2, bounds.shape[0]))
+
     def get_tensorspace(self):
         """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first."""
         if self._tensorspace is None:
             self.initialize_tensorspace()
         return self._tensorspace
-    
+
     def get_tensorspace_categorical_dimensions(self):
         """Get the a list of the categorical dimensions in the tensorspace."""
         return self._tensorspace_categorical_dimensions
-    
+
     def param_config_to_tensor(self, param_config: tuple):
         """Convert from a parameter configuration to a Tensor."""
         if len(self._map_param_to_tensor) == 0:
@@ -685,7 +694,7 @@ def param_config_to_tensor(self, param_config: tuple):
         array = []
         for i, param in enumerate(param_config):
             if self._tensorspace_param_config_structure[i] is not None:
-                continue    # skip over parameters not in the tensorspace
+                continue  # skip over parameters not in the tensorspace
             mapping = self._map_param_to_tensor[i]
             conversions = [None, str, float, int, bool]
             for c in conversions:
@@ -697,7 +706,7 @@ def param_config_to_tensor(self, param_config: tuple):
                     if c == conversions[-1]:
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
         return torch.tensor(array, **self.tensor_kwargs)
-    
+
     def tensor_to_param_config(self, tensor: Tensor):
         """Convert from a Tensor to a parameter configuration."""
         assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
@@ -709,10 +718,10 @@ def tensor_to_param_config(self, tensor: Tensor):
             if param is not None:
                 skip_counter += 1
             else:
-                value = tensor[i-skip_counter].item()
+                value = tensor[i - skip_counter].item()
                 config[i] = self._map_tensor_to_param[i][value]
         return tuple(config)
-    
+
     def get_tensorspace_bounds(self):
         """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters."""
         if self._tensorspace is None:
@@ -929,7 +938,7 @@ def order_param_configs(
                 f"The number of ordered parameter configurations ({len(ordered_param_configs)}) differs from the original number of parameter configurations ({len(param_configs)})"
             )
         return ordered_param_configs
-    
+
     def to_ax_searchspace(self):
         """Convert this searchspace to an Ax SearchSpace."""
         from ax import ChoiceParameter, FixedParameter, ParameterType, SearchSpace
@@ -943,12 +952,14 @@ def to_ax_searchspace(self):
                 continue
 
             # convert the types
-            assert all(isinstance(param_values[0], type(v)) for v in param_values), f"Parameter values of mixed types are not supported: {param_values}"
+            assert all(
+                isinstance(param_values[0], type(v)) for v in param_values
+            ), f"Parameter values of mixed types are not supported: {param_values}"
             param_type_mapping = {
                 str: ParameterType.STRING,
                 int: ParameterType.INT,
                 float: ParameterType.FLOAT,
-                bool: ParameterType.BOOL
+                bool: ParameterType.BOOL,
             }
             param_type = param_type_mapping[type(param_values[0])]
 
@@ -959,6 +970,8 @@ def to_ax_searchspace(self):
                 ax_searchspace.add_parameter(ChoiceParameter(param_name, param_type, param_values))
 
         # add the constraints
-        raise NotImplementedError("Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity.")
+        raise NotImplementedError(
+            "Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity."
+        )
 
         return ax_searchspace

From 3c48b499d9726ce9d0ae71a1a0805a27f6a67f4a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:30:25 +0100
Subject: [PATCH 102/168] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/searchspace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 2c9f87633..befc2232e 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -707,7 +707,7 @@ def param_config_to_tensor(self, param_config: tuple):
                         raise KeyError(f"No variant of {param} could be found in {mapping}") from e
         return torch.tensor(array, **self.tensor_kwargs)
 
-    def tensor_to_param_config(self, tensor: Tensor):
+    def tensor_to_param_config(self, tensor):
         """Convert from a Tensor to a parameter configuration."""
         assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})"
         if len(self._map_tensor_to_param) == 0:

From 388f3253d8f7497de0e6c7609bf1eb7be77c28fa Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:37:10 +0100
Subject: [PATCH 103/168] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
index ee552dcc7..df1b3fbb0 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
@@ -35,7 +35,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.inputs_transfer_learning: list[Tensor] = []
         self.outcomes_transfer_learning: list[Tensor] = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)

From 373782f4826d90a0dfa88cf609690080c7c9bdfa Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 13:39:24 +0100
Subject: [PATCH 104/168] Fixed torch import error due to Tensor type hint

---
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py     | 2 +-
 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
index c371eb889..88101be2b 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
@@ -52,7 +52,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.outcomes_transfer_learning: list[Tensor] = []
         self.models_transfer_learning: list = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
index 1778d3ac2..814d5fcd0 100644
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
@@ -36,7 +36,7 @@ def __init__(self, searchspace: Searchspace, runner, tuning_options):
         self.outcomes_transfer_learning: list[Tensor] = []
         self.models_transfer_learning: list = []
         for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache["kernel_name"]}-{tl_cache['device_name']}")
+            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
             # construct the searchspace for this task
             tensor_kwargs = searchspace.tensor_kwargs
             tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)

From db3abb39b135532a6e9a562fde58ad0ddde7e4aa Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 14:22:33 +0100
Subject: [PATCH 105/168] Merge with searchspace_experiments

---
 kernel_tuner/searchspace.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6c718c33d..9fd08afd9 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -144,21 +144,21 @@ def __init__(
             else:
                 raise ValueError(f"Invalid framework parameter {framework}")
 
-        # get the solver given the solver method argument
-        solver = ""
-        if solver_method.lower() == "pc_backtrackingsolver":
-            solver = BacktrackingSolver()
-        elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
-            solver = OptimizedBacktrackingSolver(forwardcheck=False)
-        elif solver_method.lower() == "pc_parallelsolver":
-            raise NotImplementedError("ParallelSolver is not yet implemented")
-            # solver = ParallelSolver()
-        elif solver_method.lower() == "pc_recursivebacktrackingsolver":
-            solver = RecursiveBacktrackingSolver()
-        elif solver_method.lower() == "pc_minconflictssolver":
-            solver = MinConflictsSolver()
-        else:
-            raise ValueError(f"Solver method {solver_method} not recognized.")
+            # get the solver given the solver method argument
+            solver = ""
+            if solver_method.lower() == "pc_backtrackingsolver":
+                solver = BacktrackingSolver()
+            elif solver_method.lower() == "pc_optimizedbacktrackingsolver":
+                solver = OptimizedBacktrackingSolver(forwardcheck=False)
+            elif solver_method.lower() == "pc_parallelsolver":
+                raise NotImplementedError("ParallelSolver is not yet implemented")
+                # solver = ParallelSolver()
+            elif solver_method.lower() == "pc_recursivebacktrackingsolver":
+                solver = RecursiveBacktrackingSolver()
+            elif solver_method.lower() == "pc_minconflictssolver":
+                solver = MinConflictsSolver()
+            else:
+                raise ValueError(f"Solver method {solver_method} not recognized.")
 
             # build the search space
             self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver)

From c692ba6cfa725cc96bb7b0ef7f495eef8d289bbf Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 14:45:04 +0100
Subject: [PATCH 106/168] Loosened required positional arguments

---
 kernel_tuner/searchspace.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 9fd08afd9..8265c44ab 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -70,8 +70,8 @@ def __init__(
             tune_params = from_cache["tune_params"]
         if from_cache is None:
             assert (
-                tune_params is not None and restrictions is not None and max_threads is not None
-            ), "Must specify positional arugments ."
+                tune_params is not None and max_threads is not None
+            ), "Must specify positional arguments."
 
         # set the object attributes using the arguments
         framework_l = framework.lower()

From fe113e6f534761136bea82acffe8283a34d833a8 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 14:56:34 +0100
Subject: [PATCH 107/168] Changed benchmarks location for hypertuner

---
 kernel_tuner/backends/hypertuner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 0b1c69adb..15867fbd9 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -68,12 +68,12 @@ def compile(self, kernel_instance):
         applications = [
             {
                 "name": "dedispersion_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "folder": "../autotuning_methodology/benchmark_hub/kernels",
                 "input_file": "dedispersion_milo.json"
             },
             {
                 "name": "convolution_milo",
-                "folder": "../autotuning_methodology/cached_data_used/kernels",
+                "folder": "../autotuning_methodology/benchmark_hub/kernels",
                 "input_file": "convolution_milo.json"
             }
         ]

From 5e65abdfbe3408b844dfa112ac14a8002b8fd6e1 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 15:03:02 +0100
Subject: [PATCH 108/168] Used hip-python-fork package as hip-python is not
 available

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 717fa9bdd..4b2d721ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,7 +59,7 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.10"          # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+python = ">=3.10,<3.15"    # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"          # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.14.1"
 botorch = ">=0.12.0"
@@ -83,7 +83,7 @@ pynvml = { version = "^11.4.1", optional = true }
 # OpenCL
 pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
 # HIP
-hip-python = { version = "*", optional = true }
+hip-python-fork = { version = "*", optional = true }
 # Tutorial (for the notebooks used in the examples)
 jupyter = { version = "^1.0.0", optional = true }
 matplotlib = { version = "^3.5.0", optional = true }

From 0ba00a0403b82913eaa9db3ede946e16dd124702 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 15:09:50 +0100
Subject: [PATCH 109/168] Removed transfer learning references

---
 kernel_tuner/interface.py                     |  38 +--
 .../bayes_opt_BOTorch_transfer_RGPE.py        | 322 ------------------
 .../bayes_opt_BOTorch_transfer_direct.py      | 141 --------
 .../bayes_opt_BOTorch_transfer_weighted.py    | 156 ---------
 4 files changed, 1 insertion(+), 656 deletions(-)
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 38855dd90..45a4bfef9 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -52,9 +52,6 @@
     bayes_opt,
     bayes_opt_alt_BOTorch,
     bayes_opt_BOTorch,
-    bayes_opt_BOTorch_transfer_direct,
-    bayes_opt_BOTorch_transfer_RGPE,
-    bayes_opt_BOTorch_transfer_weighted,
     bayes_opt_GPyTorch,
     bayes_opt_GPyTorch_lean,
     bayes_opt_old,
@@ -94,9 +91,6 @@
     "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
     "bayes_opt_BOTorch": bayes_opt_BOTorch,
     "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
-    "bayes_opt_BOTorch_transfer_direct": bayes_opt_BOTorch_transfer_direct,
-    "bayes_opt_BOTorch_transfer_weighted": bayes_opt_BOTorch_transfer_weighted,
-    "bayes_opt_BOTorch_transfer_RGPE": bayes_opt_BOTorch_transfer_RGPE,
 }
 
 
@@ -483,15 +477,6 @@ def __deepcopy__(self, _):
                 "string",
             ),
         ),
-        (
-            "transfer_learning_caches",
-            (
-                """Array of filepaths to caches to use for transfer learning.
-        Filename uses suffix ".json", which is appended if missing.
-        """,
-                "list(string) or list(Path)",
-            ),
-        ),
         ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
         ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
         ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
@@ -608,7 +593,6 @@ def tune_kernel(
     observers=None,
     objective=None,
     objective_higher_is_better=None,
-    transfer_learning_caches=[],
 ):
     start_overhead_time = perf_counter()
     if log:
@@ -710,15 +694,6 @@ def preprocess_cache(filepath):
         tuning_options.cache = {}
         tuning_options.cachefile = None
 
-    # process transfer learning caches
-    tuning_options.transfer_learning_caches = []
-    if transfer_learning_caches and len(transfer_learning_caches) > 0:
-        for transfer_learning_cache in transfer_learning_caches:
-            cache = preprocess_cache(transfer_learning_cache)
-            assert cache != tuning_options.cache, "Transfer learning cache can not be the same as current cache"
-            cache_data = util.read_cache(cache, open_cache=False)
-            tuning_options.transfer_learning_caches.append(cache_data)
-
     # create search space
     searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads)
     restrictions = searchspace._modified_restrictions
@@ -912,19 +887,9 @@ def tune_kernel_T1(
     device = kernelspec["Device"]["Name"]
     strategy = inputs["Search"]["Name"]
 
-    # set the cache and transfer learning cache paths
+    # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:
         cache_filepath = Path(kernelspec["SimulationInput"])
-    cache_dir = Path(cache_filepath).parent
-    # TODO remove in production!
-    transfer_learning_caches = [
-        p
-        for p in cache_dir.iterdir()
-        if len(p.suffixes) > 0
-        and p.suffixes[-1].endswith(".json")
-        and not p.stem.endswith("_T4")
-        and p.name != cache_filepath.name
-    ]
 
     # get the grid divisions
     grid_divs = {}
@@ -1018,7 +983,6 @@ def tune_kernel_T1(
         strategy_options=strategy_options,
         objective=objective,
         objective_higher_is_better=objective_higher_is_better,
-        transfer_learning_caches=transfer_learning_caches,
     )
     if output_T4:
         return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
deleted file mode 100644
index 88101be2b..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_RGPE.py
+++ /dev/null
@@ -1,322 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
-
-try:
-    import torch
-    from botorch.acquisition import qLogNoisyExpectedImprovement
-    from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_torch
-    from botorch.models.gpytorch import GPyTorchModel
-    from botorch.optim.optimize import optimize_acqf_discrete_local_search
-    from botorch.sampling.normal import SobolQMCNormalSampler
-    from gpytorch.distributions import MultivariateNormal
-    from gpytorch.lazy import PsdSumLazyTensor
-    from gpytorch.likelihoods import LikelihoodList
-    from gpytorch.models import GP
-    from torch import Tensor
-    from torch.nn import ModuleList
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
-from kernel_tuner.util import StopCriterionReached
-
-# settings
-NUM_BASE_TASKS = 5
-N_BATCH = 10
-NUM_POSTERIOR_SAMPLES = 256
-RANDOM_INITIALIZATION_SIZE = 3
-N_TRIALS = 10
-MC_SAMPLES = 512
-N_RESTART_CANDIDATES = 512
-N_RESTARTS = 10
-Q_BATCH_SIZE = 1
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimizationTransfer(BayesianOptimization):
-    """Bayesian Optimization class with transfer learning."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        super().__init__(searchspace, runner, tuning_options)
-
-        # set up the data and model for each transfer learning base task
-        self.searchspaces_transfer_learning: list[Searchspace] = []
-        self.inputs_transfer_learning: list[Tensor] = []
-        self.outcomes_transfer_learning: list[Tensor] = []
-        self.models_transfer_learning: list = []
-        for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
-            # construct the searchspace for this task
-            tensor_kwargs = searchspace.tensor_kwargs
-            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
-            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
-            self.searchspaces_transfer_learning.append(tl_searchspace)
-
-            # get the inputs and outcomes for this task
-            inputs = []
-            outcomes = []
-            for c in tl_cache["cache"].values():
-                result = c[tuning_options.objective]
-                if self.is_valid_result(result):
-                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
-                    inputs.append(tl_searchspace.param_config_to_tensor(config))
-                    if not self.maximize:
-                        result = -result
-                    outcomes.append(result)
-            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
-            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
-            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
-            self.inputs_transfer_learning.append(tl_inputs)
-            self.outcomes_transfer_learning.append(tl_outcomes)
-
-            # fit a model and likelihood for this task
-            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
-            mll = self.fit(mll)
-            self.models_transfer_learning.append(model)
-    
-    def roll_col(self, X, shift):
-        """Rotate columns to right by shift."""
-        return torch.cat((X[..., -shift:], X[..., :-shift]), dim=-1)
-    
-    def compute_ranking_loss(self, f_samps, target_y):
-        """Compute ranking loss for each sample from the posterior over target points.
-
-        Args:
-            f_samps: `n_samples x (n) x n`-dim tensor of samples
-            target_y: `n x 1`-dim tensor of targets
-        Returns:
-            Tensor: `n_samples`-dim tensor containing the ranking loss across each sample
-        """
-        n = target_y.shape[0]
-        if f_samps.ndim == 3:
-            # Compute ranking loss for target model
-            # take cartesian product of target_y
-            cartesian_y = torch.cartesian_prod(
-                target_y.squeeze(-1),
-                target_y.squeeze(-1),
-            ).view(n, n, 2)
-            # the diagonal of f_samps are the out-of-sample predictions
-            # for each LOO model, compare the out of sample predictions to each in-sample prediction
-            rank_loss = (
-                (
-                    (f_samps.diagonal(dim1=1, dim2=2).unsqueeze(-1) < f_samps)
-                    ^ (cartesian_y[..., 0] < cartesian_y[..., 1])
-                )
-                .sum(dim=-1)
-                .sum(dim=-1)
-            )
-        else:
-            rank_loss = torch.zeros(
-                f_samps.shape[0], dtype=torch.long, device=target_y.device
-            )
-            y_stack = target_y.squeeze(-1).expand(f_samps.shape)
-            for i in range(1, target_y.shape[0]):
-                rank_loss += (
-                    (self.roll_col(f_samps, i) < f_samps) ^ (self.roll_col(y_stack, i) < y_stack)
-                ).sum(dim=-1)
-        return rank_loss
-    
-    def get_target_model_loocv_sample_preds(self, train_x, train_y, train_yvar, target_model, num_samples, no_state=False):
-        """Create a batch-mode LOOCV GP and draw a joint sample across all points from the target task.
-
-        Args:
-            train_x: `n x d` tensor of training points
-            train_y: `n x 1` tensor of training targets
-            target_model: fitted target model
-            num_samples: number of mc samples to draw
-
-        Return: `num_samples x n x n`-dim tensor of samples, where dim=1 represents the `n` LOO models,
-            and dim=2 represents the `n` training points.
-        """
-        batch_size = len(train_x)
-        masks = torch.eye(len(train_x), dtype=torch.uint8, device=self.tensor_device).bool()
-        train_x_cv = torch.stack([train_x[~m] for m in masks])
-        train_y_cv = torch.stack([train_y[~m] for m in masks])
-        train_yvar_cv = torch.stack([train_yvar[~m] for m in masks]) if train_yvar is not None else None
-
-        # use a state dictionary for fast updates
-        if no_state:
-            state_dict_expanded = None
-        else:
-            state_dict = target_model.state_dict()
-
-            # expand to batch size of batch_mode LOOCV model
-            state_dict_expanded = {
-                name: t.expand(batch_size, *[-1 for _ in range(t.ndim)])
-                for name, t in state_dict.items()
-            }
-        
-        model, _ = self.get_model_and_likelihood(
-            self.searchspace, train_x_cv, train_y_cv, train_yvar_cv, state_dict=state_dict_expanded
-        )
-        with torch.no_grad():
-            posterior = model.posterior(train_x)
-            # Since we have a batch mode gp and model.posterior always returns an output dimension,
-            # the output from `posterior.sample()` here `num_samples x n x n x 1`, so let's squeeze
-            # the last dimension.
-            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
-            return sampler(posterior).squeeze(-1)
-    
-    def compute_rank_weights(self, train_x, train_y, train_yvar, base_models, target_model, num_samples, no_state=False):
-        """Compute ranking weights for each base model and the target model (using LOOCV for the target model).
-        
-        Note: This implementation does not currently address weight dilution, since we only have a small number of base models.
-
-        Args:
-            train_x: `n x d` tensor of training points (for target task)
-            train_y: `n` tensor of training targets (for target task)
-            base_models: list of base models
-            target_model: target model
-            num_samples: number of mc samples
-
-        Returns:
-            Tensor: `n_t`-dim tensor with the ranking weight for each model
-        """
-        ranking_losses = []
-
-        # compute ranking loss for each base model
-        for model in base_models:
-            # compute posterior over training points for target task
-            posterior = model.posterior(train_x)
-            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([num_samples]))
-            base_f_samps = sampler(posterior).squeeze(-1).squeeze(-1)
-            # compute and save ranking loss
-            ranking_losses.append(self.compute_ranking_loss(base_f_samps, train_y))
-
-        # compute ranking loss for target model using LOOCV
-        # f_samps
-        target_f_samps = self.get_target_model_loocv_sample_preds(
-            train_x,
-            train_y,
-            train_yvar,
-            target_model,
-            num_samples,
-            no_state=no_state,
-        )
-        ranking_losses.append(self.compute_ranking_loss(target_f_samps, train_y))
-        ranking_loss_tensor = torch.stack(ranking_losses)
-        # compute best model (minimum ranking loss) for each sample
-        best_models = torch.argmin(ranking_loss_tensor, dim=0)
-        # compute proportion of samples for which each model is best
-        rank_weights = (
-            best_models.bincount(minlength=len(ranking_losses)).type_as(train_x)
-            / num_samples
-        )
-        return rank_weights
-    
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-            first_loop = self.initial_sample_size > 0
-
-            # Bayesian optimization loop
-            for _ in range(fevals_left):
-
-                # fit a Gaussian Process model
-                fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
-
-                # calculate the rank weights
-                model_list = self.models_transfer_learning + [model]
-                rank_weights = self.compute_rank_weights(
-                    self.train_X,
-                    self.train_Y,
-                    self.train_Yvar,
-                    self.models_transfer_learning,
-                    model,
-                    NUM_POSTERIOR_SAMPLES,
-                    no_state=first_loop,
-                )
-
-                # create rank model and acquisition function
-                rgpe_model = RGPE(model_list, rank_weights)
-                # acqf = LogExpectedImprovement(model=rgpe_model, best_f=self.train_Y.max(), maximize=True)
-                sampler_qnei = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
-                qNEI = qLogNoisyExpectedImprovement(
-                    model=rgpe_model,
-                    X_baseline=self.train_X,
-                    sampler=sampler_qnei,
-                    prune_baseline=False,
-                )
-
-                # optimize
-                candidate, _ = optimize_acqf_discrete_local_search(
-                    acq_function=qNEI,
-                    discrete_choices=self.searchspace_tensors,
-                    q=Q_BATCH_SIZE,
-                    num_restarts=N_RESTARTS,
-                    raw_samples=N_RESTART_CANDIDATES,
-                    max_batch_size=max_batch_size
-                )
-                    
-                # evaluate the new candidate
-                self.evaluate_configs(candidate)
-                fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if fevals_left > 0:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-                    first_loop = False
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results
-
-
-class RGPE(GP, GPyTorchModel):
-    """Rank-weighted GP ensemble.
-    
-    Note: this class inherits from GPyTorchModel which provides an interface for GPyTorch models in botorch.
-    """
-
-    _num_outputs = 1  # metadata for botorch
-
-    def __init__(self, models, weights):
-        super().__init__()
-        self.models = ModuleList(models)
-        for m in models:
-            if not hasattr(m, "likelihood"):
-                raise ValueError(
-                    "RGPE currently only supports models that have a likelihood (e.g. ExactGPs)"
-                )
-        self.likelihood = LikelihoodList(*[m.likelihood for m in models])
-        self.weights = weights
-        self.to(weights)
-
-    def forward(self, x):
-        weighted_means = []
-        weighted_covars = []
-        # filter model with zero weights
-        # weights on covariance matrices are weight**2
-        non_zero_weight_indices = (self.weights**2 > 0).nonzero()
-        non_zero_weights = self.weights[non_zero_weight_indices]
-        # re-normalize
-        non_zero_weights /= non_zero_weights.sum()
-
-        for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
-            raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
-            model = self.models[raw_idx]
-            posterior = model.posterior(x)
-            # unstandardize predictions
-            posterior_mean = posterior.mean.squeeze(-1)
-            posterior_cov = posterior.mvn.lazy_covariance_matrix
-            # apply weight
-            weight = non_zero_weights[non_zero_weight_idx]
-            weighted_means.append(weight * posterior_mean)
-            weighted_covars.append(posterior_cov * weight**2)
-        # set mean and covariance to be the rank-weighted sum the means and covariances of the
-        # base models and target model
-        mean_x = torch.stack(weighted_means).sum(dim=0)
-        covar_x = PsdSumLazyTensor(*weighted_covars)
-        return MultivariateNormal(mean_x, covar_x)
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
deleted file mode 100644
index df1b3fbb0..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_direct.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
-
-try:
-    import torch
-    from botorch.acquisition import LogExpectedImprovement
-    from botorch.optim.optimize import optimize_acqf_discrete
-    from torch import Tensor
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-from math import ceil, sqrt
-
-import numpy as np
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
-from kernel_tuner.util import StopCriterionReached
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimizationTransfer(BayesianOptimization):
-    """Bayesian Optimization class with transfer learning."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        super().__init__(searchspace, runner, tuning_options)
-
-        # set up the data and model for each transfer learning base task
-        self.searchspaces_transfer_learning: list[Searchspace] = []
-        self.inputs_transfer_learning: list[Tensor] = []
-        self.outcomes_transfer_learning: list[Tensor] = []
-        for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
-            # construct the searchspace for this task
-            tensor_kwargs = searchspace.tensor_kwargs
-            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
-            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
-            self.searchspaces_transfer_learning.append(tl_searchspace)
-
-            # get the inputs and outcomes for this task
-            inputs = []
-            outcomes = []
-            for c in tl_cache["cache"].values():
-                result = c[tuning_options.objective]
-                if self.is_valid_result(result):
-                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
-                    inputs.append(tl_searchspace.param_config_to_tensor(config))
-                    if not self.maximize:
-                        result = -result
-                    outcomes.append(result)
-            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
-            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
-            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
-            self.inputs_transfer_learning.append(tl_inputs)
-            self.outcomes_transfer_learning.append(tl_outcomes)
-    
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-
-            # create array to gradually reduce number of optimization spaces as fewer fevals are left
-            tensorspace_size = self.searchspace_tensors.size(0)
-            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
-            fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
-            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
-            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
-            # if there's a discrepency, add or subtract the difference from the first number
-            if np.sum(nums_optimization_spaces) != fevals_left:
-                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
-            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
-            fevals_left += reserve_final_loops
-
-            # get the number of top configurations to select from transfer learning caches
-            num_tl_caches = len(self.outcomes_transfer_learning)
-            use_top_configs_until_loop = np.argmax(nums_optimization_spaces < num_tl_caches+1)  # stop after we have fewer num_optimization_spaces than caches - because with more caches the ACQF will not be used as much
-            num_top_configs = sum([ceil(n/(num_tl_caches+1)) for n in nums_optimization_spaces[:use_top_configs_until_loop]])
-
-            # select the top configurations for each of the transfer learning caches
-            top_configs = [list() for _ in self.outcomes_transfer_learning]
-            for tl_index, tl_outcomes in enumerate(self.outcomes_transfer_learning):
-                top_configs[tl_index] = self.inputs_transfer_learning[tl_outcomes.argmax()[:num_top_configs]] # TODO check if correct
-
-            # # if there are duplicate configurations, move them up and make sure there are only unique configs
-            # duplicate configurations are inserted at the num_configs-(sum of indices)th index
-            # # TODO
-
-            # Bayesian optimization loop
-            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
-                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
-
-                # calculate how many of the optimization spaces to optimize using GP
-                optimize_with_GP = max(round(num_optimization_spaces/(num_tl_caches+1)), 1)
-
-                # divide the optimization space into random chuncks
-                tensorspace_size = self.searchspace_tensors.size(0)
-                if num_optimization_spaces <= 1:
-                    optimization_spaces = [self.searchspace_tensors]
-                else:
-                    # shuffle the searchspace
-                    shuffled_indices = torch.randperm(tensorspace_size)
-                    tensorspace = self.searchspace_tensors[shuffled_indices]
-                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
-
-                # fit on a Gaussian Process model
-                mll = self.fit(mll)
-
-                # define the acquisition functions
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                
-                # optimize acquisition function to find the next evaluation point
-                for i, optimization_space in enumerate(optimization_spaces):
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
-                    
-                    # evaluate the new candidate
-                    self.evaluate_configs(candidate)
-                    fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if loop_i < len(nums_optimization_spaces) - 1:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py b/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
deleted file mode 100644
index 814d5fcd0..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch_transfer_weighted.py
+++ /dev/null
@@ -1,156 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch and transfer learning with RGPE."""
-
-try:
-    import torch
-    from botorch.acquisition import LogExpectedImprovement
-    from botorch.optim.optimize import optimize_acqf_discrete
-    from torch import Tensor
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-from math import ceil, sqrt
-
-import numpy as np
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.bayes_opt_BOTorch import BayesianOptimization
-from kernel_tuner.util import StopCriterionReached
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimizationTransfer(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimizationTransfer(BayesianOptimization):
-    """Bayesian Optimization class with transfer learning."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        super().__init__(searchspace, runner, tuning_options)
-
-        # set up the data and model for each transfer learning base task
-        self.searchspaces_transfer_learning: list[Searchspace] = []
-        self.inputs_transfer_learning: list[Tensor] = []
-        self.outcomes_transfer_learning: list[Tensor] = []
-        self.models_transfer_learning: list = []
-        for tl_cache in tuning_options.transfer_learning_caches:
-            print(f"Importing transfer learning for {tl_cache['kernel_name']}-{tl_cache['device_name']}")
-            # construct the searchspace for this task
-            tensor_kwargs = searchspace.tensor_kwargs
-            tl_searchspace = Searchspace(None, None, None, from_cache=tl_cache)
-            tl_searchspace.initialize_tensorspace(**tensor_kwargs)
-            self.searchspaces_transfer_learning.append(tl_searchspace)
-
-            # get the inputs and outcomes for this task
-            inputs = []
-            outcomes = []
-            for c in tl_cache["cache"].values():
-                result = c[tuning_options.objective]
-                if self.is_valid_result(result):
-                    config = tuple(c[p] for p in tl_searchspace.tune_params.keys())
-                    inputs.append(tl_searchspace.param_config_to_tensor(config))
-                    if not self.maximize:
-                        result = -result
-                    outcomes.append(result)
-            tl_inputs = torch.stack(inputs).to(tl_searchspace.tensor_device)
-            tl_outcomes = torch.tensor(outcomes, **tensor_kwargs).unsqueeze(-1)
-            assert tl_inputs.shape[0] == tl_outcomes.shape[0]
-            self.inputs_transfer_learning.append(tl_inputs)
-            self.outcomes_transfer_learning.append(tl_outcomes)
-
-            # fit a model and likelihood for this task
-            model, mll = self.get_model_and_likelihood(tl_searchspace, tl_inputs, tl_outcomes)
-            mll = self.fit(mll)
-            self.models_transfer_learning.append(model)
-    
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-
-            # create array to gradually reduce number of optimization spaces as fewer fevals are left
-            tensorspace_size = self.searchspace_tensors.size(0)
-            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
-            fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
-            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
-            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
-            # if there's a discrepency, add or subtract the difference from the first number
-            if np.sum(nums_optimization_spaces) != fevals_left:
-                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
-            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
-            fevals_left += reserve_final_loops
-
-            # create the acquisition functions for the transferred GPs
-            acqfs = [LogExpectedImprovement(model=m, best_f=self.outcomes_transfer_learning[i].max(), maximize=True) for i, m in enumerate(self.models_transfer_learning)]
-            acqfs_results = [list() for _ in acqfs]
-
-            # Bayesian optimization loop
-            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
-                num_optimization_spaces = round(min(num_optimization_spaces, fevals_left))
-
-                # fit on a Gaussian Process model
-                mll = self.fit(mll)
-
-                # divide the optimization space into random chuncks
-                tensorspace_size = self.searchspace_tensors.size(0)
-                if num_optimization_spaces <= 1:
-                    optimization_spaces = [self.searchspace_tensors]
-                else:
-                    # shuffle the searchspace
-                    shuffled_indices = torch.randperm(tensorspace_size)
-                    tensorspace = self.searchspace_tensors[shuffled_indices]
-                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
-
-                # set which acqfuisition function is used at each point of the optimization space loop
-                if num_optimization_spaces > len(self.models_transfer_learning):
-                    # all models get a proportional turn
-                    selected_acqfs = np.linspace(start=0, stop=len(acqfs), num=num_optimization_spaces)
-                    selected_acqfs = selected_acqfs.round(0).astype(int)
-                    selected_acqfs = selected_acqfs.clip(0, len(acqfs)-1)
-                elif num_optimization_spaces == len(self.models_transfer_learning):
-                    # all models get one turn
-                    selected_acqfs = list(range(num_optimization_spaces))
-                elif num_optimization_spaces == 1:
-                    # only the target model is used
-                    selected_acqfs = [0]
-                else:
-                    # only select the target + best performing models (can include target as well)
-                    acqfs_means = np.array([np.mean(r) for r in acqfs_results])
-                    selected_acqfs = [0] + np.argpartition(acqfs_means, -num_optimization_spaces-1)[-num_optimization_spaces-1:]
-                    selected_acqfs = selected_acqfs.round(0).astype(int).clip(0, num_optimization_spaces-1)
-
-                # define the acquisition functions
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                current_acqfs = [acqf] + acqfs
-                
-                # optimize acquisition function to find the next evaluation point
-                for i, optimization_space in enumerate(optimization_spaces):
-                    acqfs_index = selected_acqfs[i]
-                    candidate, _ = optimize_acqf_discrete(
-                        current_acqfs[acqfs_index], 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
-                    
-                    # evaluate the new candidate
-                    result = self.evaluate_configs(candidate)
-                    if len(result) == 1:
-                        acqfs_results[acqfs_index].append(result[0])
-                    fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if loop_i < len(nums_optimization_spaces) - 1:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results

From 6633bed01374372a17d0553ddf736223474fe1c9 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 17:35:22 +0100
Subject: [PATCH 110/168] Updated pyproject

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4b2d721ab..633a6f04b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,7 +62,6 @@ kernel_tuner = "kernel_tuner.interface:entry_point"
 python = ">=3.10,<3.15"    # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"          # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.14.1"
-botorch = ">=0.12.0"
 packaging = "*"                 # required by file_utils
 jsonschema = "*"
 python-constraint2 = "^2.1.0"

From c39ac5a9e3137348cda1d8fb8a848acf98797ca5 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 21:30:11 +0100
Subject: [PATCH 111/168] Adjusted hyper.py for paper

---
 kernel_tuner/hyper.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 9c052d033..040fd09bb 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -61,7 +61,7 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs)
     # pass a temporary cache file to avoid duplicate execution
     if 'cache' not in kwargs:
         cachefile = get_random_unique_filename('temp_', '.json')
-        cachefile = Path("hyperparamtuning_milo_bruteforce_dual_annealing.json")
+        cachefile = Path(f"hyperparamtuning_paper_bruteforce_{target_strategy}.json")
         kwargs['cache'] = str(cachefile)
 
     def put_if_not_present(target_dict, key, value):
@@ -88,14 +88,14 @@ def put_if_not_present(target_dict, key, value):
     return list(result_unique.values()), env
 
 if __name__ == "__main__":  # TODO remove in production
-    # hyperparams = {
-    #     'popsize': [10, 20, 30],
-    #     'maxiter': [50, 100, 150],
-    #     'w': [0.25, 0.5, 0.75],
-    #     'c1': [1.0, 2.0, 3.0],
-    #     'c2': [0.5, 1.0, 1.5]
-    # }
-    # result, env = tune_hyper_params('pso', hyperparams)
+    hyperparams = {
+        'popsize': [10, 20, 30],
+        'maxiter': [50, 100, 150],
+        'w': [0.25, 0.5, 0.75],
+        'c1': [1.0, 2.0, 3.0],
+        'c2': [0.5, 1.0, 1.5]
+    }
+    result, env = tune_hyper_params('pso', hyperparams)
 
     # hyperparams = {
     #     'neighbor': ['Hamming', 'adjacent'],
@@ -105,10 +105,10 @@ def put_if_not_present(target_dict, key, value):
     # }
     # result, env = tune_hyper_params('greedy_ils', hyperparams)
 
-    hyperparams = {
-        'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
-    }
-    result, env = tune_hyper_params('dual_annealing', hyperparams)
+    # hyperparams = {
+    #     'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
+    # }
+    # result, env = tune_hyper_params('dual_annealing', hyperparams)
 
     print(result)
     print(env['best_config'])

From cc19515035e5186b2b8ddb9a5ee93439b82663de Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 7 Mar 2025 21:31:09 +0100
Subject: [PATCH 112/168] Extended hypertuner with additional kernels, adjusted
 for benchmark_hub

---
 kernel_tuner/backends/hypertuner.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 15867fbd9..f05f56455 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -64,17 +64,28 @@ def compile(self, kernel_instance):
         # gpus = ["RTX_3090", "RTX_2080_Ti"]
         # applications = None
 
-        gpus = ["A100", "A4000", "MI250X", "W6600"]
+        gpus = ["A100", "W6600"]
+        folder = "../../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
                 "name": "dedispersion_milo",
-                "folder": "../autotuning_methodology/benchmark_hub/kernels",
+                "folder": folder,
                 "input_file": "dedispersion_milo.json"
             },
             {
                 "name": "convolution_milo",
-                "folder": "../autotuning_methodology/benchmark_hub/kernels",
+                "folder": folder,
                 "input_file": "convolution_milo.json"
+            },
+            {
+                "name": "hotspot_milo",
+                "folder": folder,
+                "input_file": "hotspot_milo.json"
+            },
+            {
+                "name": "gemm_milo",
+                "folder": folder,
+                "input_file": "gemm_milo.json"
             }
         ]
 
@@ -93,6 +104,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
+                "repeats": 10,
                 "samples": self.iterations 
             }
         }

From 638d216b6e1d4e0db0f9a1d8dcd77eadbbddaa3b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 8 Mar 2025 07:14:58 +0100
Subject: [PATCH 113/168] Implemented passing strategy to hyperparametertune by
 CLI argument

---
 kernel_tuner/hyper.py | 55 +++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 040fd09bb..2017a3c9b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -3,6 +3,7 @@
 
 from pathlib import Path
 from random import randint
+from argparse import ArgumentParser
 
 import kernel_tuner
 
@@ -87,28 +88,36 @@ def put_if_not_present(target_dict, key, value):
             result_unique[config_id] = r
     return list(result_unique.values()), env
 
-if __name__ == "__main__":  # TODO remove in production
-    hyperparams = {
-        'popsize': [10, 20, 30],
-        'maxiter': [50, 100, 150],
-        'w': [0.25, 0.5, 0.75],
-        'c1': [1.0, 2.0, 3.0],
-        'c2': [0.5, 1.0, 1.5]
-    }
-    result, env = tune_hyper_params('pso', hyperparams)
-
-    # hyperparams = {
-    #     'neighbor': ['Hamming', 'adjacent'],
-    #     'restart': [True, False],
-    #     'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
-    #     'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
-    # }
-    # result, env = tune_hyper_params('greedy_ils', hyperparams)
-
-    # hyperparams = {
-    #     'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
-    # }
-    # result, env = tune_hyper_params('dual_annealing', hyperparams)
-
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("strategy_to_tune")
+    args = parser.parse_args()
+    strategy_to_tune = args.strategy_to_tune
+
+    # select the hyperparameter parameters for the selected optimization algorithm
+    if strategy_to_tune.lower() == "pso":
+        hyperparams = {
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+            'w': [0.25, 0.5, 0.75],
+            'c1': [1.0, 2.0, 3.0],
+            'c2': [0.5, 1.0, 1.5]
+        }
+    elif strategy_to_tune.lower() == "greedy_ils":
+        hyperparams = {
+            'neighbor': ['Hamming', 'adjacent'],
+            'restart': [True, False],
+            'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
+            'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+        }
+    elif strategy_to_tune.lower() == "dual_annealing":
+        hyperparams = {
+            'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
+        }
+    else:
+        raise ValueError(f"Invalid argument {strategy_to_tune=}")
+
+    # run the hyperparameter tuning
+    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams)
     print(result)
     print(env['best_config'])

From d36adb5eeb3eb0d280f6672249929d6bfd5a175a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 8 Mar 2025 07:15:28 +0100
Subject: [PATCH 114/168] Extended hyperparmeter tuning with 4 more strategies

---
 kernel_tuner/hyper.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 2017a3c9b..d6785c899 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -114,6 +114,30 @@ def put_if_not_present(target_dict, key, value):
         hyperparams = {
             'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'],
         }
+    elif strategy_to_tune.lower() == "diff_evo":
+        hyperparams = {
+            'method': ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"],
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+        }
+    elif strategy_to_tune.lower() == "basinhopping":
+        hyperparams = {
+            'method': ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"],
+            'T': [0.5, 1.0, 1.5],
+        }
+    elif strategy_to_tune.lower() == "genetic_algorithm":
+        hyperparams = {
+            'method': ["single_point", "two_point", "uniform", "disruptive_uniform"],
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+            'mutation_chance': [5, 10, 20]
+        }
+    elif strategy_to_tune.lower() == "mls":
+        hyperparams = {
+            'neighbor': ["Hamming", "adjacent"],
+            'restart': [True, False],
+            'randomize': [True, False]
+        }
     else:
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 

From 4e46459b2859a5cabd97292eb72ce29abb5b4b1a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 8 Mar 2025 23:39:27 +0100
Subject: [PATCH 115/168] Generate a unique filename for generated experiment
 files to avoid collisions in parallel runs

---
 kernel_tuner/backends/hypertuner.py | 5 ++++-
 kernel_tuner/hyper.py               | 6 +++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index f05f56455..ec818b6b3 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -111,7 +111,7 @@ def compile(self, kernel_instance):
 
         name = kernel_instance.name if len(kernel_instance.name) > 0 else kernel_instance.kernel_source.kernel_name
         experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, applications, gpus, 
-                                                        override=override, overwrite_existing_file=True)
+                                                        override=override, generate_unique_file=True, overwrite_existing_file=True)
         return str(experiments_filepath)
     
     def start_event(self):
@@ -134,6 +134,9 @@ def run_kernel(self, func, gpu_args=None, threads=None, grid=None, stream=None):
         # run the methodology to get a fitness score for this configuration
         scores = get_strategy_scores(str(experiments_filepath))
         self.last_score = scores[list(scores.keys())[0]]['score']
+
+        # remove the experiments file
+        experiments_filepath.unlink()
     
     def memset(self, allocation, value, size):
         return super().memset(allocation, value, size)
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index d6785c899..6df76370f 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -75,8 +75,12 @@ def put_if_not_present(target_dict, key, value):
     kwargs['verify'] = None
     arguments = [target_strategy]
 
+    # IMPORTANT when running this script in parallel, always make sure the below name is unique among your runs!
+    # e.g. when parallalizing over the hypertuning of multiple strategies, use the strategy name
+    name = f"hyperparamtuning_{target_strategy.lower()}"
+
     # execute the hyperparameter tuning
-    result, env = kernel_tuner.tune_kernel('hyperparamtuning', None, [], arguments, hyper_params, *args, lang='Hypertuner',
+    result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, *args, lang='Hypertuner',
                                     objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs)
     
     # remove the temporary cachefile and return only unique results in order

From d28fdbe4318b05b7b9f5999e63a4e30e48cc294d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 10 Mar 2025 17:32:23 +0100
Subject: [PATCH 116/168] Adjusted the test / train sets and number of repeats

---
 kernel_tuner/backends/hypertuner.py | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index ec818b6b3..70921fe4c 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -61,10 +61,7 @@ def compile(self, kernel_instance):
         path.mkdir(exist_ok=True)
 
         # TODO get applications & GPUs args from benchmark
-        # gpus = ["RTX_3090", "RTX_2080_Ti"]
-        # applications = None
-
-        gpus = ["A100", "W6600"]
+        gpus = ["A100", "A4000", "MI250X", "W6600"]
         folder = "../../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
@@ -72,21 +69,21 @@ def compile(self, kernel_instance):
                 "folder": folder,
                 "input_file": "dedispersion_milo.json"
             },
-            {
-                "name": "convolution_milo",
-                "folder": folder,
-                "input_file": "convolution_milo.json"
-            },
+            # {
+            #     "name": "convolution_milo",
+            #     "folder": folder,
+            #     "input_file": "convolution_milo.json"
+            # },
             {
                 "name": "hotspot_milo",
                 "folder": folder,
                 "input_file": "hotspot_milo.json"
             },
-            {
-                "name": "gemm_milo",
-                "folder": folder,
-                "input_file": "gemm_milo.json"
-            }
+            # {
+            #     "name": "gemm_milo",
+            #     "folder": folder,
+            #     "input_file": "gemm_milo.json"
+            # }
         ]
 
         # strategy settings
@@ -104,7 +101,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 10,
+                "repeats": 20,
                 "samples": self.iterations 
             }
         }

From 49fa92f4205bf2df4e94e08f9c4a12efc7978f00 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Mon, 10 Mar 2025 23:31:33 +0100
Subject: [PATCH 117/168] Added simulated_annealing to hyperparameter tuning,
 adjusted greedy_ils parameters

---
 kernel_tuner/hyper.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 6df76370f..88a47a2bb 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -111,8 +111,8 @@ def put_if_not_present(target_dict, key, value):
         hyperparams = {
             'neighbor': ['Hamming', 'adjacent'],
             'restart': [True, False],
-            'no_improvement': [1, 10, 25, 33, 50, 66, 75, 100, 200],
-            'random_walk': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
+            'no_improvement': [10, 25, 50, 75],
+            'random_walk': [0.1, 0.2, 0.3, 0.4, 0.5]
         }
     elif strategy_to_tune.lower() == "dual_annealing":
         hyperparams = {
@@ -142,6 +142,13 @@ def put_if_not_present(target_dict, key, value):
             'restart': [True, False],
             'randomize': [True, False]
         }
+    elif strategy_to_tune.lower() == "simulated_annealing":
+        hyperparams = {
+            'T': [0.5, 1.0, 1.5],
+            'T_min': [0.0001, 0.001, 0.01],
+            'alpha': [0.9925, 0.995, 0.9975],
+            'maxiter': [1, 2, 3]
+        }
     else:
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 

From 1056269590a8d19c12003581bb2f0c683cb9d526 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Mar 2025 10:21:01 +0100
Subject: [PATCH 118/168] Updated hyperparameters

---
 kernel_tuner/hyper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 88a47a2bb..ba3e615be 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -103,7 +103,7 @@ def put_if_not_present(target_dict, key, value):
         hyperparams = {
             'popsize': [10, 20, 30],
             'maxiter': [50, 100, 150],
-            'w': [0.25, 0.5, 0.75],
+            # 'w': [0.25, 0.5, 0.75],   # disabled due to low influence according to KW-test (H=0.0215) and mutual information
             'c1': [1.0, 2.0, 3.0],
             'c2': [0.5, 1.0, 1.5]
         }
@@ -127,7 +127,7 @@ def put_if_not_present(target_dict, key, value):
     elif strategy_to_tune.lower() == "basinhopping":
         hyperparams = {
             'method': ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"],
-            'T': [0.5, 1.0, 1.5],
+            'T': [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5],
         }
     elif strategy_to_tune.lower() == "genetic_algorithm":
         hyperparams = {

From 7ce2234b0a2c373d9bd15b6349af233b3c5bf4f0 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Mar 2025 10:21:37 +0100
Subject: [PATCH 119/168] Updated search spaces used in hyperparameter tuning
 and number of repeats

---
 kernel_tuner/backends/hypertuner.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 70921fe4c..a7ee2665e 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -61,7 +61,7 @@ def compile(self, kernel_instance):
         path.mkdir(exist_ok=True)
 
         # TODO get applications & GPUs args from benchmark
-        gpus = ["A100", "A4000", "MI250X", "W6600"]
+        gpus = ["A100", "A4000", "MI250X"]
         folder = "../../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
@@ -69,21 +69,21 @@ def compile(self, kernel_instance):
                 "folder": folder,
                 "input_file": "dedispersion_milo.json"
             },
-            # {
-            #     "name": "convolution_milo",
-            #     "folder": folder,
-            #     "input_file": "convolution_milo.json"
-            # },
             {
                 "name": "hotspot_milo",
                 "folder": folder,
                 "input_file": "hotspot_milo.json"
             },
-            # {
-            #     "name": "gemm_milo",
-            #     "folder": folder,
-            #     "input_file": "gemm_milo.json"
-            # }
+            {
+                "name": "convolution_milo",
+                "folder": folder,
+                "input_file": "convolution_milo.json"
+            },
+            {
+                "name": "gemm_milo",
+                "folder": folder,
+                "input_file": "gemm_milo.json"
+            }
         ]
 
         # strategy settings
@@ -101,7 +101,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 20,
+                "repeats": 25,
                 "samples": self.iterations 
             }
         }

From 1ed18937c8a75245ca5faf004e8fbb0c7fdd7b0e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 15 Mar 2025 18:01:02 +0100
Subject: [PATCH 120/168] Added bayes_opt to hyperparamtuning

---
 kernel_tuner/hyper.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index ba3e615be..a831370ef 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -149,6 +149,14 @@ def put_if_not_present(target_dict, key, value):
             'alpha': [0.9925, 0.995, 0.9975],
             'maxiter': [1, 2, 3]
         }
+    elif strategy_to_tune.lower() == "bayes_opt":
+        hyperparams = {
+            # 'covariancekernel': ["constantrbf", "rbf", "matern32", "matern52"],
+            # 'covariancelengthscale': [1.0, 1.5, 2.0],
+            'method': ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"],
+            'samplingmethod': ["random", "LHS"],
+            'popsize': [10, 20, 30]
+        }
     else:
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 

From 1e2532f132ed289a7e14f8a0055f013b0363b595 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Mon, 17 Mar 2025 07:49:10 +0100
Subject: [PATCH 121/168] Fixed link with hyperparameter tuning attributes

---
 kernel_tuner/interface.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 45a4bfef9..54b9f9974 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -886,6 +886,16 @@ def tune_kernel_T1(
     problem_size = kernelspec["ProblemSize"]
     device = kernelspec["Device"]["Name"]
     strategy = inputs["Search"]["Name"]
+    if "Attributes" in inputs["Search"]:
+        strategy_options = {}
+        for attribute in inputs["Search"]["Attributes"]:
+            strategy_options[attribute["Name"]] = attribute["Value"]
+    if "Budget" in inputs:
+        budget = inputs["Budget"][0]
+        assert budget["Type"] == "ConfigurationCount"
+        if strategy_options is None:
+            strategy_options = {}
+        strategy_options["max_fevals"] = budget["BudgetValue"]
 
     # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:

From afbf83ea053cc9e2c318a686fb7dc3442f1cd1e6 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 00:47:22 +0100
Subject: [PATCH 122/168] Added support for evaluating T1 strings as a type

---
 kernel_tuner/interface.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 54b9f9974..5f4c1b628 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -918,6 +918,8 @@ def tune_kernel_T1(
                 tune_param = eval(vals)
             else:
                 tune_param = literal_eval(vals)
+        if param["Type"] == "string":
+            tune_param = eval(param["Values"])
         if tune_param is not None:
             tune_params[param["Name"]] = tune_param
         else:

From 84a2b1f0278c9ddbdbca39e1f5a28636a58284e3 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 00:47:57 +0100
Subject: [PATCH 123/168] Added automatic scaling of random sample size if
 necessary

---
 kernel_tuner/strategies/genetic_algorithm.py |  6 ++++-
 kernel_tuner/strategies/pso.py               | 23 ++++++++++++--------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index c29c150b5..461f655e9 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -1,4 +1,5 @@
 """A simple genetic algorithm for parameter search."""
+
 import random
 
 import numpy as np
@@ -20,6 +21,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
     pop_size, generations, method, mutation_chance = common.get_options(options, _options)
+    pop_size = min(round(searchspace.size / 2), pop_size)
     crossover = supported_methods[method]
 
     best_score = 1e20
@@ -46,7 +48,9 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         # 'best_score' is used only for printing
         if tuning_options.verbose and cost_func.results:
-            best_score = util.get_best_config(cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better)[tuning_options.objective]
+            best_score = util.get_best_config(
+                cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better
+            )[tuning_options.objective]
 
         if tuning_options.verbose:
             print("Generation %d, best_score %f" % (generation, best_score))
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 5b0df1429..19ada64ac 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -1,4 +1,5 @@
 """The strategy that uses particle swarm optimization."""
+
 import random
 import sys
 
@@ -9,22 +10,25 @@
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
 
-_options = dict(popsize=("Population size", 20),
-                       maxiter=("Maximum number of iterations", 100),
-                       w=("Inertia weight constant", 0.5),
-                       c1=("Cognitive constant", 2.0),
-                       c2=("Social constant", 1.0))
+_options = dict(
+    popsize=("Population size", 20),
+    maxiter=("Maximum number of iterations", 100),
+    w=("Inertia weight constant", 0.5),
+    c1=("Cognitive constant", 2.0),
+    c2=("Social constant", 1.0),
+)
+
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
-    #scale variables in x because PSO works with velocities to visit different configurations
+    # scale variables in x because PSO works with velocities to visit different configurations
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
 
-    #using this instead of get_bounds because scaling is used
+    # using this instead of get_bounds because scaling is used
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
-
     num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options)
+    num_particles = min(round(searchspace.size / 2), num_particles)
 
     best_score_global = sys.float_info.max
     best_position_global = []
@@ -64,7 +68,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             swarm[j].update_position(bounds)
 
     if tuning_options.verbose:
-        print('Final result:')
+        print("Final result:")
         print(best_position_global)
         print(best_score_global)
 
@@ -73,6 +77,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 tune.__doc__ = common.get_strategy_docstring("Particle Swarm Optimization (PSO)", _options)
 
+
 class Particle:
     def __init__(self, bounds):
         self.ndim = len(bounds)

From 9e8047944ff33c075aa994986ae988a4b57cfc44 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 00:48:15 +0100
Subject: [PATCH 124/168] Formatting

---
 kernel_tuner/util.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index a4fb42bd3..01cca83a5 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -459,13 +459,13 @@ def get_interval(a: list):
     a_min = min(a)
     a_max = max(a)
     if len(a) <= 2:
-        return (a_min, a_max, a_max-a_min)
+        return (a_min, a_max, a_max - a_min)
     # determine the first step size
-    step = a[1]-a_min
+    step = a[1] - a_min
     # for each element, the step size should be equal to the first step
     for i, e in enumerate(a):
-        if e-a[i-1] != step:
-            return None 
+        if e - a[i - 1] != step:
+            return None
     result = (a_min, a_max, step)
     if not all(isinstance(e, (int, float)) for e in result):
         return None
@@ -1039,12 +1039,12 @@ def to_equality_constraint(
                 return AllDifferentConstraint()
             return ValueError(f"Not possible: comparator should be '==' or '!=', is {comparator}")
         return None
-    
+
     # remove functionally duplicate restrictions (preserves order and whitespace)
     if all(isinstance(r, str) for r in restrictions):
         # clean the restriction strings to functional equivalence
-        restrictions_cleaned = [r.replace(' ', '') for r in restrictions]
-        restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned)) # dict preserves order
+        restrictions_cleaned = [r.replace(" ", "") for r in restrictions]
+        restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned))  # dict preserves order
         # get the indices of the unique restrictions, use these to build a new list of restrictions
         restrictions_unique_indices = [restrictions_cleaned.index(r) for r in restrictions_cleaned_unique]
         restrictions = [restrictions[i] for i in restrictions_unique_indices]
@@ -1107,8 +1107,12 @@ def to_equality_constraint(
             # combine multiple restrictions into one
             for res_tuple in res_dict.values():
                 res, params_used = res_tuple
-                params_used = list(dict.fromkeys(params_used))   # param_used should only contain unique, dict preserves order
-                parsed_restrictions_pyatf.append((f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used))
+                params_used = list(
+                    dict.fromkeys(params_used)
+                )  # param_used should only contain unique, dict preserves order
+                parsed_restrictions_pyatf.append(
+                    (f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used)
+                )
             parsed_restrictions = parsed_restrictions_pyatf
     else:
         # create one monolithic function

From ce552d06de058d402215aec4f6ab313f9da15b37 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 06:07:05 +0100
Subject: [PATCH 125/168] Minor update to hyperparameter tuning

---
 kernel_tuner/backends/hypertuner.py | 2 +-
 kernel_tuner/hyper.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index a7ee2665e..b15da315a 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -101,7 +101,7 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 25,
+                "repeats": 10,
                 "samples": self.iterations 
             }
         }
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index a831370ef..27672cf97 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -136,7 +136,7 @@ def put_if_not_present(target_dict, key, value):
             'maxiter': [50, 100, 150],
             'mutation_chance': [5, 10, 20]
         }
-    elif strategy_to_tune.lower() == "mls":
+    elif strategy_to_tune.lower() == "greedy_mls":
         hyperparams = {
             'neighbor': ["Hamming", "adjacent"],
             'restart': [True, False],

From 2714c2880dbb4e581a248a8438a4e7718214d687 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 06:34:29 +0100
Subject: [PATCH 126/168] Set new default hyperparameters for PSO, dual
 annealing and simulated annealing

---
 kernel_tuner/strategies/dual_annealing.py      | 2 +-
 kernel_tuner/strategies/pso.py                 | 6 +++---
 kernel_tuner/strategies/simulated_annealing.py | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
index 0f44bd849..ace532534 100644
--- a/kernel_tuner/strategies/dual_annealing.py
+++ b/kernel_tuner/strategies/dual_annealing.py
@@ -8,7 +8,7 @@
 
 supported_methods = ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr']
 
-_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "Powell"))
+_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "COBYLA"))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 19ada64ac..cc6b82d49 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -12,10 +12,10 @@
 
 _options = dict(
     popsize=("Population size", 20),
-    maxiter=("Maximum number of iterations", 100),
+    maxiter=("Maximum number of iterations", 150),
     w=("Inertia weight constant", 0.5),
-    c1=("Cognitive constant", 2.0),
-    c2=("Social constant", 1.0),
+    c1=("Cognitive constant", 3.0),
+    c2=("Social constant", 1.5),
 )
 
 
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index dcb9e3f26..d73c0ad5e 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -9,10 +9,10 @@
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
 
-_options = dict(T=("Starting temperature", 1.0),
-                       T_min=("End temperature", 0.001),
-                       alpha=("Alpha parameter", 0.995),
-                       maxiter=("Number of iterations within each annealing step", 1))
+_options = dict(T=("Starting temperature", 0.5),
+                       T_min=("End temperature", 0.0001),
+                       alpha=("Alpha parameter", 0.9975),
+                       maxiter=("Number of iterations within each annealing step", 2))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     # SA works with real parameter values and does not need scaling

From 25d5202c8689f557f2f19e19488aa20d26a08fe8 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Tue, 18 Mar 2025 16:41:23 +0100
Subject: [PATCH 127/168] Set new default hyperparameters for Genetic Algorithm
 and Differential Evolution

---
 kernel_tuner/strategies/diff_evo.py          | 2 +-
 kernel_tuner/strategies/genetic_algorithm.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index 62e966f33..cd089ae1e 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -9,7 +9,7 @@
 supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp",
                      "randtobest1bin", "best2bin", "rand2bin", "rand1bin"]
 
-_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"),
+_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "randtobest1bin"),
                        popsize=("Population size", 20),
                        maxiter=("Number of generations", 100))
 
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 461f655e9..0ca0f5f75 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -10,10 +10,10 @@
 from kernel_tuner.strategies.common import CostFunc
 
 _options = dict(
-    popsize=("population size", 20),
-    maxiter=("maximum number of generations", 100),
+    popsize=("population size", 30),
+    maxiter=("maximum number of generations", 30),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
-    mutation_chance=("chance to mutate is 1 in mutation_chance", 10),
+    mutation_chance=("chance to mutate is 1 in mutation_chance", 20),
 )
 
 

From 651c42c2fc3e6153e3c4a235125e7ad95e26371f Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:28:08 +0100
Subject: [PATCH 128/168] Avoid requesting more random samples than the
 searchspace size

---
 kernel_tuner/searchspace.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 8265c44ab..6331bed33 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from random import choice, shuffle
 from typing import List, Union
+from warnings import warn
 
 import numpy as np
 from constraint import (
@@ -69,9 +70,7 @@ def __init__(
             ), "When `from_cache` is used, the positional arguments must be set to None."
             tune_params = from_cache["tune_params"]
         if from_cache is None:
-            assert (
-                tune_params is not None and max_threads is not None
-            ), "Must specify positional arguments."
+            assert tune_params is not None and max_threads is not None, "Must specify positional arguments."
 
         # set the object attributes using the arguments
         framework_l = framework.lower()
@@ -864,6 +863,11 @@ def get_random_sample_indices(self, num_samples: int) -> np.ndarray:
 
     def get_random_sample(self, num_samples: int) -> List[tuple]:
         """Get the parameter configurations for a random, non-conflicting sample (caution: not unique in consecutive calls)."""
+        if self.size < num_samples:
+            warn(
+                f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})"
+            )
+            num_samples = self.size
         return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples))
 
     def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None) -> List[int]:

From b953a69af37bcb3beedb0003db7758af0e25cc8d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:29:00 +0100
Subject: [PATCH 129/168] Clearer message when exceeding the stop criterion

---
 kernel_tuner/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 01cca83a5..adaa2b6df 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -193,9 +193,9 @@ def check_argument_list(kernel_name, kernel_string, args):
 def check_stop_criterion(to):
     """Checks if max_fevals is reached or time limit is exceeded."""
     if "max_fevals" in to and len(to.unique_results) >= to.max_fevals:
-        raise StopCriterionReached("max_fevals reached")
+        raise StopCriterionReached(f"max_fevals reached ({len(to.unique_results)} >= {to.max_fevals})")
     if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit):
-        raise StopCriterionReached("time limit exceeded")
+        raise StopCriterionReached(f"time limit ({to.time_limit}) exceeded")
 
 
 def check_tune_params_list(tune_params, observers, simulation_mode=False):

From a401008651bd7765d49988515c5a0848518ec2f7 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:30:17 +0100
Subject: [PATCH 130/168] Add soft maximum function evaluations limit to dual
 annealing

---
 kernel_tuner/strategies/dual_annealing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
index ace532534..7d9868c5e 100644
--- a/kernel_tuner/strategies/dual_annealing.py
+++ b/kernel_tuner/strategies/dual_annealing.py
@@ -12,7 +12,8 @@
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
-    method = common.get_options(tuning_options.strategy_options, _options)[0]
+    _options["max_fevals"] = ("", searchspace.size)
+    method, max_fevals = common.get_options(tuning_options.strategy_options, _options)
 
     #scale variables in x to make 'eps' relevant for multiple variables
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
@@ -29,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     opt_result = None
     try:
-        opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0)
+        opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0, maxfun=max_fevals)
     except util.StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)

From 425b4f4f1935158fe1a8a0cbf624f2dea1b1e025 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:30:43 +0100
Subject: [PATCH 131/168] Improved rounding of encoded parameter values

---
 kernel_tuner/strategies/common.py | 40 ++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index eb0b81e27..30c53bc10 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -61,8 +61,16 @@ class CostFunc:
     """Class encapsulating the CostFunc method."""
 
     def __init__(
-        self, searchspace: Searchspace, tuning_options, runner, *, 
-        scaling=False, snap=True, encode_non_numeric=False, return_invalid=False, return_raw=None
+        self,
+        searchspace: Searchspace,
+        tuning_options,
+        runner,
+        *,
+        scaling=False,
+        snap=True,
+        encode_non_numeric=False,
+        return_invalid=False,
+        return_raw=None,
     ):
         """An abstract method to handle evaluation of configurations.
 
@@ -75,7 +83,7 @@ def __init__(
             encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False.
             return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False.
             return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None.
-        """        
+        """
         self.runner = runner
         self.snap = snap
         self.scaling = scaling
@@ -100,7 +108,9 @@ def __init__(
             for i, param_values in enumerate(self.searchspace.params_values):
                 encoded_values = param_values
                 if not all(isinstance(v, numbers.Real) for v in param_values):
-                    encoded_values = np.arange(len(param_values))
+                    encoded_values = np.arange(
+                        len(param_values)
+                    )  # NOTE when changing this, adjust the rounding in encoded_to_params
                     self._map_param_to_encoded[i] = dict(zip(param_values, encoded_values))
                     self._map_encoded_to_param[i] = dict(zip(encoded_values, param_values))
                 self.encoded_params_values.append(encoded_values)
@@ -215,17 +225,29 @@ def get_bounds(self):
         for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values:
             bounds.append((min(values), max(values)))
         return bounds
-    
+
     def encoded_to_params(self, config):
         """Convert from an encoded configuration to the real parameters."""
         if not self.encode_non_numeric:
             raise ValueError("'encode_non_numeric' must be set to true to use this function.")
         params = []
         for i, v in enumerate(config):
-            params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
-        assert len(params) == len(config)            
+            # params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v)
+            if i in self._map_encoded_to_param:
+                encoding = self._map_encoded_to_param[i]
+                if v in encoding:
+                    param = encoding[v]
+                elif isinstance(v, float):
+                    # try to resolve a rounding error due to floating point arithmetic / continous solver
+                    param = encoding[round(v)]
+                else:
+                    raise ValueError(f"Encoded value {v} not found in {self._map_encoded_to_param[i]}")
+            else:
+                param = v
+            params.append(param)
+        assert len(params) == len(config)
         return params
-    
+
     def params_to_encoded(self, config):
         """Convert from a parameter configuration to the encoded configuration."""
         if not self.encode_non_numeric:
@@ -233,7 +255,7 @@ def params_to_encoded(self, config):
         encoded = []
         for i, v in enumerate(config):
             encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v)
-        assert len(encoded) == len(config)            
+        assert len(encoded) == len(config)
         return encoded
 
 

From 3bba92367743179e9662cbaf8c9c0a28824b01c6 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 11:59:39 +0100
Subject: [PATCH 132/168] Updated pyproject and requirements files

---
 doc/requirements.txt      | 176 ++++++++--------
 doc/requirements_test.txt | 411 +++++++++++++++++++++++++-------------
 pyproject.toml            |   2 +-
 3 files changed, 357 insertions(+), 232 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 355caa7a6..5f316bb33 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,89 +1,87 @@
-alabaster==0.7.16 ; python_version >= "3.9" and python_version < "3.15"
-asttokens==3.0.0 ; python_version >= "3.9" and python_version < "3.15"
-attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15"
-babel==2.17.0 ; python_version >= "3.9" and python_version < "3.15"
-beautifulsoup4==4.13.3 ; python_version >= "3.9" and python_version < "3.15"
-bleach[css]==6.2.0 ; python_version >= "3.9" and python_version < "3.15"
-certifi==2025.1.31 ; python_version >= "3.9" and python_version < "3.15"
-cffi==1.17.1 ; python_version >= "3.9" and python_version < "3.15" and implementation_name == "pypy"
-charset-normalizer==3.4.1 ; python_version >= "3.9" and python_version < "3.15"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and sys_platform == "win32"
-decorator==5.2.1 ; python_version >= "3.9" and python_version < "3.15"
-defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "3.15"
-docutils==0.20.1 ; python_version >= "3.9" and python_version < "3.15"
-dom-toml==2.0.1 ; python_version >= "3.9" and python_version < "3.15"
-domdf-python-tools==3.10.0 ; python_version >= "3.9" and python_version < "3.15"
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
-executing==2.2.0 ; python_version >= "3.9" and python_version < "3.15"
-fastjsonschema==2.21.1 ; python_version >= "3.9" and python_version < "3.15"
-idna==3.10 ; python_version >= "3.9" and python_version < "3.15"
-imagesize==1.4.1 ; python_version >= "3.9" and python_version < "3.15"
-importlib-metadata==8.6.1 ; python_version >= "3.9" and python_version < "3.10"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-ipython==8.18.1 ; python_version >= "3.9" and python_version < "3.15"
-jedi==0.19.2 ; python_version >= "3.9" and python_version < "3.15"
-jinja2==3.1.6 ; python_version >= "3.9" and python_version < "3.15"
-joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15"
-jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15"
-jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15"
-jupyter-client==8.6.3 ; python_version >= "3.9" and python_version < "3.15"
-jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "3.15"
-jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "3.15"
-markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.15"
-matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "3.15"
-mistune==3.1.2 ; python_version >= "3.9" and python_version < "3.15"
-natsort==8.4.0 ; python_version >= "3.9" and python_version < "3.15"
-nbclient==0.10.2 ; python_version >= "3.9" and python_version < "3.15"
-nbconvert==7.16.6 ; python_version >= "3.9" and python_version < "3.15"
-nbformat==5.10.4 ; python_version >= "3.9" and python_version < "3.15"
-nbsphinx==0.9.7 ; python_version >= "3.9" and python_version < "3.15"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15"
-packaging==24.2 ; python_version >= "3.9" and python_version < "3.15"
-pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15"
-pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "3.15"
-parso==0.8.4 ; python_version >= "3.9" and python_version < "3.15"
-pexpect==4.9.0 ; python_version >= "3.9" and python_version < "3.15" and sys_platform != "win32"
-platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15"
-prompt-toolkit==3.0.50 ; python_version >= "3.9" and python_version < "3.15"
-ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "3.15" and sys_platform != "win32"
-pure-eval==0.2.3 ; python_version >= "3.9" and python_version < "3.15"
-pycparser==2.22 ; python_version >= "3.9" and python_version < "3.15" and implementation_name == "pypy"
-pygments==2.19.1 ; python_version >= "3.9" and python_version < "3.15"
-pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15"
-python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15"
-python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15"
-pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15"
-pywin32==308 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.15"
-pyzmq==26.2.1 ; python_version >= "3.9" and python_version < "3.15"
-referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15"
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.15"
-rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15"
-scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15"
-six==1.17.0 ; python_version >= "3.9" and python_version < "3.15"
-snowballstemmer==2.2.0 ; python_version >= "3.9" and python_version < "3.15"
-soupsieve==2.6 ; python_version >= "3.9" and python_version < "3.15"
-sphinx-pyproject==0.3.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinx-rtd-theme==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinx==7.4.7 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-jquery==4.1 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.9" and python_version < "3.15"
-stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.15"
-threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15"
-tinycss2==1.4.0 ; python_version >= "3.9" and python_version < "3.15"
-tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15"
-tornado==6.4.2 ; python_version >= "3.9" and python_version < "3.15"
-traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.15"
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.15"
-tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15"
-urllib3==2.3.0 ; python_version >= "3.9" and python_version < "3.15"
-wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.15"
-webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.15"
-xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15"
-zipp==3.21.0 ; python_version >= "3.9" and python_version < "3.10"
+alabaster==0.7.16 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+babel==2.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+bleach==6.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+certifi==2025.1.31 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+cffi==1.17.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32"
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+defusedxml==0.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+docutils==0.20.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+dom-toml==2.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+idna==3.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+imagesize==1.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jinja2==3.1.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jupyter-client==8.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jupyter-core==5.7.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+mistune==3.1.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+natsort==8.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+pywin32==310 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" and platform_python_implementation != "PyPy" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" and platform_python_implementation != "PyPy"
+pyzmq==26.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+requests==2.32.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+soupsieve==2.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinx==7.4.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tinycss2==1.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tornado==6.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+urllib3==2.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index f4f62912c..11ed8518b 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -1,116 +1,195 @@
-argcomplete==3.6.0 ; python_version >= "3.9" and python_version < "3.15" \
+argcomplete==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b \
     --hash=sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd
-attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e \
-    --hash=sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a
-build==1.2.2.post1 ; python_version >= "3.9" and python_version < "3.15" \
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \
+    --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \
+    --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b
+build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \
     --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and (sys_platform == "win32" or os_name == "nt") \
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and os_name == "nt" \
     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
-colorlog==6.9.0 ; python_version >= "3.9" and python_version < "3.15" \
+colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \
     --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2
-coverage[toml]==7.6.12 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:00b2086892cf06c7c2d74983c9595dc511acca00665480b3ddff749ec4fb2a95 \
-    --hash=sha256:0533adc29adf6a69c1baa88c3d7dbcaadcffa21afbed3ca7a225a440e4744bf9 \
-    --hash=sha256:06097c7abfa611c91edb9e6920264e5be1d6ceb374efb4986f38b09eed4cb2fe \
-    --hash=sha256:07e92ae5a289a4bc4c0aae710c0948d3c7892e20fd3588224ebe242039573bf0 \
-    --hash=sha256:0a9d8be07fb0832636a0f72b80d2a652fe665e80e720301fb22b191c3434d924 \
-    --hash=sha256:0e549f54ac5f301e8e04c569dfdb907f7be71b06b88b5063ce9d6953d2d58574 \
-    --hash=sha256:0ef01d70198431719af0b1f5dcbefc557d44a190e749004042927b2a3fed0702 \
-    --hash=sha256:0f16f44025c06792e0fb09571ae454bcc7a3ec75eeb3c36b025eccf501b1a4c3 \
-    --hash=sha256:14d47376a4f445e9743f6c83291e60adb1b127607a3618e3185bbc8091f0467b \
-    --hash=sha256:1a936309a65cc5ca80fa9f20a442ff9e2d06927ec9a4f54bcba9c14c066323f2 \
-    --hash=sha256:1ceeb90c3eda1f2d8c4c578c14167dbd8c674ecd7d38e45647543f19839dd6ea \
-    --hash=sha256:1f7ffa05da41754e20512202c866d0ebfc440bba3b0ed15133070e20bf5aeb5f \
-    --hash=sha256:200e10beb6ddd7c3ded322a4186313d5ca9e63e33d8fab4faa67ef46d3460af3 \
-    --hash=sha256:220fa6c0ad7d9caef57f2c8771918324563ef0d8272c94974717c3909664e674 \
-    --hash=sha256:2251fabcfee0a55a8578a9d29cecfee5f2de02f11530e7d5c5a05859aa85aee9 \
-    --hash=sha256:2458f275944db8129f95d91aee32c828a408481ecde3b30af31d552c2ce284a0 \
-    --hash=sha256:299cf973a7abff87a30609879c10df0b3bfc33d021e1adabc29138a48888841e \
-    --hash=sha256:2b996819ced9f7dbb812c701485d58f261bef08f9b85304d41219b1496b591ef \
-    --hash=sha256:3688b99604a24492bcfe1c106278c45586eb819bf66a654d8a9a1433022fb2eb \
-    --hash=sha256:3a1e465f398c713f1b212400b4e79a09829cd42aebd360362cd89c5bdc44eb87 \
-    --hash=sha256:488c27b3db0ebee97a830e6b5a3ea930c4a6e2c07f27a5e67e1b3532e76b9ef1 \
-    --hash=sha256:48cfc4641d95d34766ad41d9573cc0f22a48aa88d22657a1fe01dca0dbae4de2 \
-    --hash=sha256:4b467a8c56974bf06e543e69ad803c6865249d7a5ccf6980457ed2bc50312703 \
-    --hash=sha256:53c56358d470fa507a2b6e67a68fd002364d23c83741dbc4c2e0680d80ca227e \
-    --hash=sha256:5d1095bbee1851269f79fd8e0c9b5544e4c00c0c24965e66d8cba2eb5bb535fd \
-    --hash=sha256:641dfe0ab73deb7069fb972d4d9725bf11c239c309ce694dd50b1473c0f641c3 \
-    --hash=sha256:64cbb1a3027c79ca6310bf101014614f6e6e18c226474606cf725238cf5bc2d4 \
-    --hash=sha256:66fe626fd7aa5982cdebad23e49e78ef7dbb3e3c2a5960a2b53632f1f703ea45 \
-    --hash=sha256:676f92141e3c5492d2a1596d52287d0d963df21bf5e55c8b03075a60e1ddf8aa \
-    --hash=sha256:69e62c5034291c845fc4df7f8155e8544178b6c774f97a99e2734b05eb5bed31 \
-    --hash=sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8 \
-    --hash=sha256:78f5243bb6b1060aed6213d5107744c19f9571ec76d54c99cc15938eb69e0e86 \
-    --hash=sha256:79cac3390bfa9836bb795be377395f28410811c9066bc4eefd8015258a7578c6 \
-    --hash=sha256:7ae6eabf519bc7871ce117fb18bf14e0e343eeb96c377667e3e5dd12095e0288 \
-    --hash=sha256:7e39e845c4d764208e7b8f6a21c541ade741e2c41afabdfa1caa28687a3c98cf \
-    --hash=sha256:8161d9fbc7e9fe2326de89cd0abb9f3599bccc1287db0aba285cb68d204ce929 \
-    --hash=sha256:8bec2ac5da793c2685ce5319ca9bcf4eee683b8a1679051f8e6ec04c4f2fd7dc \
-    --hash=sha256:959244a17184515f8c52dcb65fb662808767c0bd233c1d8a166e7cf74c9ea985 \
-    --hash=sha256:9b148068e881faa26d878ff63e79650e208e95cf1c22bd3f77c3ca7b1d9821a3 \
-    --hash=sha256:aa6f302a3a0b5f240ee201297fff0bbfe2fa0d415a94aeb257d8b461032389bd \
-    --hash=sha256:ace9048de91293e467b44bce0f0381345078389814ff6e18dbac8fdbf896360e \
-    --hash=sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879 \
-    --hash=sha256:b01a840ecc25dce235ae4c1b6a0daefb2a203dba0e6e980637ee9c2f6ee0df57 \
-    --hash=sha256:b076e625396e787448d27a411aefff867db2bffac8ed04e8f7056b07024eed5a \
-    --hash=sha256:b172f8e030e8ef247b3104902cc671e20df80163b60a203653150d2fc204d1ad \
-    --hash=sha256:b1f097878d74fe51e1ddd1be62d8e3682748875b461232cf4b52ddc6e6db0bba \
-    --hash=sha256:b95574d06aa9d2bd6e5cc35a5bbe35696342c96760b69dc4287dbd5abd4ad51d \
-    --hash=sha256:bda1c5f347550c359f841d6614fb8ca42ae5cb0b74d39f8a1e204815ebe25750 \
-    --hash=sha256:cec6b9ce3bd2b7853d4a4563801292bfee40b030c05a3d29555fd2a8ee9bd68c \
-    --hash=sha256:d1a987778b9c71da2fc8948e6f2656da6ef68f59298b7e9786849634c35d2c3c \
-    --hash=sha256:d74c08e9aaef995f8c4ef6d202dbd219c318450fe2a76da624f2ebb9c8ec5d9f \
-    --hash=sha256:e18aafdfb3e9ec0d261c942d35bd7c28d031c5855dadb491d2723ba54f4c3015 \
-    --hash=sha256:e216c5c45f89ef8971373fd1c5d8d1164b81f7f5f06bbf23c37e7908d19e8558 \
-    --hash=sha256:e695df2c58ce526eeab11a2e915448d3eb76f75dffe338ea613c1201b33bab2f \
-    --hash=sha256:e7575ab65ca8399c8c4f9a7d61bbd2d204c8b8e447aab9d355682205c9dd948d \
-    --hash=sha256:e995b3b76ccedc27fe4f477b349b7d64597e53a43fc2961db9d3fbace085d69d \
-    --hash=sha256:ea31689f05043d520113e0552f039603c4dd71fa4c287b64cb3606140c66f425 \
-    --hash=sha256:eb5507795caabd9b2ae3f1adc95f67b1104971c22c624bb354232d65c4fc90b3 \
-    --hash=sha256:eb8668cfbc279a536c633137deeb9435d2962caec279c3f8cf8b91fff6ff8953 \
-    --hash=sha256:ecea0c38c9079570163d663c0433a9af4094a60aafdca491c6a3d248c7432827 \
-    --hash=sha256:f25d8b92a4e31ff1bd873654ec367ae811b3a943583e05432ea29264782dc32c \
-    --hash=sha256:f60a297c3987c6c02ffb29effc70eadcbb412fe76947d394a1091a3615948e2f \
-    --hash=sha256:f973643ef532d4f9be71dd88cf7588936685fdb576d93a79fe9f65bc337d9d73
-distlib==0.3.9 ; python_version >= "3.9" and python_version < "3.15" \
+coverage==7.7.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:056d3017ed67e7ddf266e6f57378ece543755a4c9231e997789ab3bd11392c94 \
+    --hash=sha256:0ce8cf59e09d31a4915ff4c3b94c6514af4c84b22c4cc8ad7c3c546a86150a92 \
+    --hash=sha256:104bf640f408f4e115b85110047c7f27377e1a8b7ba86f7db4fa47aa49dc9a8e \
+    --hash=sha256:1393e5aa9441dafb0162c36c8506c648b89aea9565b31f6bfa351e66c11bcd82 \
+    --hash=sha256:1586ad158523f4133499a4f322b230e2cfef9cc724820dbd58595a5a236186f4 \
+    --hash=sha256:180e3fc68ee4dc5af8b33b6ca4e3bb8aa1abe25eedcb958ba5cff7123071af68 \
+    --hash=sha256:1b336d06af14f8da5b1f391e8dec03634daf54dfcb4d1c4fb6d04c09d83cef90 \
+    --hash=sha256:1c8fbce80b2b8bf135d105aa8f5b36eae0c57d702a1cc3ebdea2a6f03f6cdde5 \
+    --hash=sha256:2d673e3add00048215c2cc507f1228a7523fd8bf34f279ac98334c9b07bd2656 \
+    --hash=sha256:316f29cc3392fa3912493ee4c83afa4a0e2db04ff69600711f8c03997c39baaa \
+    --hash=sha256:33c1394d8407e2771547583b66a85d07ed441ff8fae5a4adb4237ad39ece60db \
+    --hash=sha256:37cbc7b0d93dfd133e33c7ec01123fbb90401dce174c3b6661d8d36fb1e30608 \
+    --hash=sha256:39abcacd1ed54e2c33c54bdc488b310e8ef6705833f7148b6eb9a547199d375d \
+    --hash=sha256:3ab7090f04b12dc6469882ce81244572779d3a4b67eea1c96fb9ecc8c607ef39 \
+    --hash=sha256:3b0e6e54591ae0d7427def8a4d40fca99df6b899d10354bab73cd5609807261c \
+    --hash=sha256:416e2a8845eaff288f97eaf76ab40367deafb9073ffc47bf2a583f26b05e5265 \
+    --hash=sha256:4545485fef7a8a2d8f30e6f79ce719eb154aab7e44217eb444c1d38239af2072 \
+    --hash=sha256:4c124025430249118d018dcedc8b7426f39373527c845093132196f2a483b6dd \
+    --hash=sha256:4fbb7a0c3c21908520149d7751cf5b74eb9b38b54d62997b1e9b3ac19a8ee2fe \
+    --hash=sha256:52fc89602cde411a4196c8c6894afb384f2125f34c031774f82a4f2608c59d7d \
+    --hash=sha256:55143aa13c49491f5606f05b49ed88663446dce3a4d3c5d77baa4e36a16d3573 \
+    --hash=sha256:57f3bd0d29bf2bd9325c0ff9cc532a175110c4bf8f412c05b2405fd35745266d \
+    --hash=sha256:5b2f144444879363ea8834cd7b6869d79ac796cb8f864b0cfdde50296cd95816 \
+    --hash=sha256:5efdeff5f353ed3352c04e6b318ab05c6ce9249c25ed3c2090c6e9cadda1e3b2 \
+    --hash=sha256:60e6347d1ed882b1159ffea172cb8466ee46c665af4ca397edbf10ff53e9ffaf \
+    --hash=sha256:693d921621a0c8043bfdc61f7d4df5ea6d22165fe8b807cac21eb80dd94e4bbd \
+    --hash=sha256:708f0a1105ef2b11c79ed54ed31f17e6325ac936501fc373f24be3e6a578146a \
+    --hash=sha256:70f0925c4e2bfc965369f417e7cc72538fd1ba91639cf1e4ef4b1a6b50439b3b \
+    --hash=sha256:7789e700f33f2b133adae582c9f437523cd5db8de845774988a58c360fc88253 \
+    --hash=sha256:7b6c96d69928a3a6767fab8dc1ce8a02cf0156836ccb1e820c7f45a423570d98 \
+    --hash=sha256:7d2a65876274acf544703e943c010b60bd79404e3623a1e5d52b64a6e2728de5 \
+    --hash=sha256:7f18d47641282664276977c604b5a261e51fefc2980f5271d547d706b06a837f \
+    --hash=sha256:89078312f06237417adda7c021c33f80f7a6d2db8572a5f6c330d89b080061ce \
+    --hash=sha256:8c938c6ae59be67ac19a7204e079efc94b38222cd7d0269f96e45e18cddeaa59 \
+    --hash=sha256:8e336b56301774ace6be0017ff85c3566c556d938359b61b840796a0202f805c \
+    --hash=sha256:a0a207c87a9f743c8072d059b4711f8d13c456eb42dac778a7d2e5d4f3c253a7 \
+    --hash=sha256:a2454b12a3f12cc4698f3508912e6225ec63682e2ca5a96f80a2b93cef9e63f3 \
+    --hash=sha256:a538a23119d1e2e2ce077e902d02ea3d8e0641786ef6e0faf11ce82324743944 \
+    --hash=sha256:aa4dff57fc21a575672176d5ab0ef15a927199e775c5e8a3d75162ab2b0c7705 \
+    --hash=sha256:ad0edaa97cb983d9f2ff48cadddc3e1fb09f24aa558abeb4dc9a0dbacd12cbb4 \
+    --hash=sha256:ae8006772c6b0fa53c33747913473e064985dac4d65f77fd2fdc6474e7cd54e4 \
+    --hash=sha256:b0fac2088ec4aaeb5468b814bd3ff5e5978364bfbce5e567c44c9e2854469f6c \
+    --hash=sha256:b3e212a894d8ae07fde2ca8b43d666a6d49bbbddb10da0f6a74ca7bd31f20054 \
+    --hash=sha256:b54a1ee4c6f1905a436cbaa04b26626d27925a41cbc3a337e2d3ff7038187f07 \
+    --hash=sha256:b667b91f4f714b17af2a18e220015c941d1cf8b07c17f2160033dbe1e64149f0 \
+    --hash=sha256:b8c36093aca722db73633cf2359026ed7782a239eb1c6db2abcff876012dc4cf \
+    --hash=sha256:bb356e7ae7c2da13f404bf8f75be90f743c6df8d4607022e759f5d7d89fe83f8 \
+    --hash=sha256:bce730d484038e97f27ea2dbe5d392ec5c2261f28c319a3bb266f6b213650135 \
+    --hash=sha256:c075d167a6ec99b798c1fdf6e391a1d5a2d054caffe9593ba0f97e3df2c04f0e \
+    --hash=sha256:c4e09534037933bf6eb31d804e72c52ec23219b32c1730f9152feabbd7499463 \
+    --hash=sha256:c5f8a5364fc37b2f172c26a038bc7ec4885f429de4a05fc10fdcb53fb5834c5c \
+    --hash=sha256:cb203c0afffaf1a8f5b9659a013f8f16a1b2cad3a80a8733ceedc968c0cf4c57 \
+    --hash=sha256:cc41374d2f27d81d6558f8a24e5c114580ffefc197fd43eabd7058182f743322 \
+    --hash=sha256:cd879d4646055a573775a1cec863d00c9ff8c55860f8b17f6d8eee9140c06166 \
+    --hash=sha256:d013c07061751ae81861cae6ec3a4fe04e84781b11fd4b6b4201590234b25c7b \
+    --hash=sha256:d8c7524779003d59948c51b4fcbf1ca4e27c26a7d75984f63488f3625c328b9b \
+    --hash=sha256:d9710521f07f526de30ccdead67e6b236fe996d214e1a7fba8b36e2ba2cd8261 \
+    --hash=sha256:e1ffde1d6bc2a92f9c9207d1ad808550873748ac2d4d923c815b866baa343b3f \
+    --hash=sha256:e7f559c36d5cdc448ee13e7e56ed7b6b5d44a40a511d584d388a0f5d940977ba \
+    --hash=sha256:f2a1e18a85bd066c7c556d85277a7adf4651f259b2579113844835ba1a74aafd \
+    --hash=sha256:f32b165bf6dfea0846a9c9c38b7e1d68f313956d60a15cde5d1709fddcaf3bee \
+    --hash=sha256:f5a2f71d6a91238e7628f23538c26aa464d390cbdedf12ee2a7a0fb92a24482a \
+    --hash=sha256:f81fe93dc1b8e5673f33443c0786c14b77e36f1025973b85e07c70353e46882b
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \
+    --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a
+distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \
     --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" \
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \
     --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
     --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
-filelock==3.17.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338 \
-    --hash=sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e
-importlib-metadata==8.6.1 ; python_version >= "3.9" and python_full_version < "3.10.2" \
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \
+    --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755
+filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \
+    --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de
+importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \
     --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \
     --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
-    --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
-joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15" \
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \
+    --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \
+    --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \
+    --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \
     --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e
-jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15" \
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \
     --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf
-jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15" \
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \
     --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566
-mock==5.2.0 ; python_version >= "3.9" and python_version < "3.15" \
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \
+    --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \
+    --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \
+    --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \
+    --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \
+    --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \
+    --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \
+    --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \
+    --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \
+    --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \
+    --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \
+    --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \
+    --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \
+    --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \
+    --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \
+    --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \
+    --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \
+    --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \
+    --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \
+    --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \
+    --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \
+    --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \
+    --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \
+    --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \
+    --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \
+    --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \
+    --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \
+    --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \
+    --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \
+    --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \
+    --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \
+    --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \
+    --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \
+    --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \
+    --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \
+    --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \
+    --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \
+    --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \
+    --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \
+    --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \
+    --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \
+    --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \
+    --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \
+    --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \
+    --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \
+    --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \
+    --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \
+    --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \
+    --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \
+    --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \
+    --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \
+    --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \
+    --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \
+    --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \
+    --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \
+    --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \
+    --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \
+    --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \
+    --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \
+    --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \
+    --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca
+mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \
     --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f
-nox-poetry==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \
+nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \
     --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682
-nox==2024.10.9 ; python_version >= "3.9" and python_version < "3.15" \
+nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
     --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
     --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
@@ -147,10 +226,10 @@ numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
     --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
     --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
-packaging==24.2 ; python_version >= "3.9" and python_version < "3.15" \
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
-pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \
     --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \
     --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \
@@ -193,28 +272,46 @@ pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
-pep440==0.1.2 ; python_version >= "3.9" and python_version < "3.15" \
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \
+    --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d
+pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \
     --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d
-platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \
-    --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15" \
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+    --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \
+    --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \
+    --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
     --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
-pyproject-hooks==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \
+    --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+    --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \
+    --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \
+    --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \
+    --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c
+pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \
     --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913
-pytest-cov==5.0.0 ; python_version >= "3.9" and python_version < "3.15" \
+pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \
     --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857
-pytest-timeout==2.3.1 ; python_version >= "3.9" and python_version < "3.15" \
+pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \
     --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e
-pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15" \
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \
     --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845
-python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15" \
+python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:02f46e4a7e8a46048604870287f1c55312eea47c2c15dd58b51057cb7d057bdc \
     --hash=sha256:0e5ece0b4e85ed680af6b9db33ef3497a6f9499b8957cd830cd139f17ac29aef \
     --hash=sha256:0f3a09c1947e6a90b9558cd1651e86dbe10f698aad56247596f2b856307707f0 \
@@ -231,16 +328,16 @@ python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15"
     --hash=sha256:f28d07eae04d83d454f0e6ba2da0678786a21f2d405998a3eec960b56d809692 \
     --hash=sha256:fbb6ab033a7a4250bce11ca12fdf8958c6c42853e933cf585dbd265e0967dd93 \
     --hash=sha256:fc3cffd0f16cb9b34d2e95bd6d27425dd24044073760477a1341e835fc9c45f4
-python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15" \
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
     --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
-pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15" \
+pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57 \
     --hash=sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e
-referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15" \
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \
     --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0
-rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \
     --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \
     --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \
@@ -344,7 +441,7 @@ rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \
     --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \
     --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8
-ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \
+ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \
     --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \
     --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \
@@ -362,7 +459,7 @@ ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \
     --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \
     --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca
-scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \
     --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \
     --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \
@@ -393,39 +490,63 @@ scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \
     --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \
     --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d \
-    --hash=sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c \
-    --hash=sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca \
-    --hash=sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9 \
-    --hash=sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54 \
-    --hash=sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16 \
-    --hash=sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2 \
-    --hash=sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5 \
-    --hash=sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59 \
-    --hash=sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326 \
-    --hash=sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b \
-    --hash=sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1 \
-    --hash=sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d \
-    --hash=sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24 \
-    --hash=sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627 \
-    --hash=sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c \
-    --hash=sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa \
-    --hash=sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949 \
-    --hash=sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989 \
-    --hash=sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004 \
-    --hash=sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f \
-    --hash=sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884 \
-    --hash=sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299 \
-    --hash=sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94 \
-    --hash=sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f
-six==1.17.0 ; python_version >= "3.9" and python_version < "3.15" \
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \
+    --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \
+    --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \
+    --hash=sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d \
+    --hash=sha256:28a0d2c2075946346e4408b211240764759e0fabaeb08d871639b5f3b1aca8a0 \
+    --hash=sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8 \
+    --hash=sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af \
+    --hash=sha256:42dabaaa798e987c425ed76062794e93a243be8f0f20fff6e7a89f4d61cb3d40 \
+    --hash=sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9 \
+    --hash=sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971 \
+    --hash=sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d \
+    --hash=sha256:597a0c7008b21c035831c39927406c6181bcf8f60a73f36219b69d010aa04737 \
+    --hash=sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e \
+    --hash=sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32 \
+    --hash=sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53 \
+    --hash=sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1 \
+    --hash=sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d \
+    --hash=sha256:6a8e34cf4c188b6dd004654f88586d78f95639e48a25dfae9c5e34a6dc34547e \
+    --hash=sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776 \
+    --hash=sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5 \
+    --hash=sha256:6f5e296ec63c5da6ba6fa0343ea73fd51b8b3e1a300b0a8cae3ed4b1122c7462 \
+    --hash=sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274 \
+    --hash=sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301 \
+    --hash=sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3 \
+    --hash=sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58 \
+    --hash=sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4 \
+    --hash=sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa \
+    --hash=sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9 \
+    --hash=sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27 \
+    --hash=sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9 \
+    --hash=sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f \
+    --hash=sha256:a8bf5cb4a25046ac61d38f8d3c3426ec11ebc350246a4642f2f315fe95bda655 \
+    --hash=sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20 \
+    --hash=sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65 \
+    --hash=sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93 \
+    --hash=sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828 \
+    --hash=sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd \
+    --hash=sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f \
+    --hash=sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec \
+    --hash=sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb \
+    --hash=sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6 \
+    --hash=sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded \
+    --hash=sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e \
+    --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \
+    --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \
+    --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
     --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81
-threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15" \
-    --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \
-    --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
-tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \
+    --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \
+    --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \
     --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \
     --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \
@@ -458,21 +579,27 @@ tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \
     --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \
     --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \
     --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7
-tomlkit==0.13.2 ; python_version >= "3.9" and python_version < "3.15" \
+tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \
     --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" \
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \
+    --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
-tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15" \
+tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694 \
     --hash=sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639
-virtualenv==20.29.3 ; python_version >= "3.9" and python_version < "3.15" \
+virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \
     --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac
-xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15" \
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \
+    --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \
     --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac
-zipp==3.21.0 ; python_version >= "3.9" and python_full_version < "3.10.2" \
+zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \
     --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \
     --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931
diff --git a/pyproject.toml b/pyproject.toml
index c62cb66e6..2529c7adc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,7 +109,7 @@ markupsafe = "^2.0.1"       # TODO why do we need markupsafe here?
 # sphinx-autodoc-typehints = "^1.24.0"
 
 # ATTENTION: if anything is changed here, run `poetry update`
-# Please also run `poetry export -f requirements.txt --output docs/requirements_test.txt --with test`
+# Please also run `poetry export -f requirements.txt --output doc/requirements_test.txt --with test`
 [tool.poetry.group.test]
 optional = true
 [tool.poetry.group.test.dependencies]

From 64dfd95f3a4da7b65031a72d0d7c58d18b92852c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:37:21 +0100
Subject: [PATCH 133/168] Improved assertion error message

---
 test/test_runners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_runners.py b/test/test_runners.py
index 527c1d252..13a398d3e 100644
--- a/test/test_runners.py
+++ b/test/test_runners.py
@@ -158,7 +158,7 @@ def test_time_keeping(env):
                               answer=answer)
     max_time = (time.perf_counter() - start) * 1e3  # ms
 
-    assert len(result) >= 10
+    assert len(result) >= 10, f"{len(result)=} < 10 for {kernel_name=} with {tune_params=}"
 
     timings = [
         'total_framework_time', 'total_strategy_time', 'total_compile_time',

From 5a83d3659aef4737238422e32acaee5b0e97bdde Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:38:46 +0100
Subject: [PATCH 134/168] Added logging in case default block size restriction
 is added

---
 kernel_tuner/searchspace.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 6331bed33..8b285f5ad 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -236,6 +236,7 @@ def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: in
                     isinstance(self._modified_restrictions, list)
                     and block_size_restriction_spaced not in self._modified_restrictions
                 ):
+                    print(f"added default block size restriction '{block_size_restriction_spaced}'")
                     self._modified_restrictions.append(block_size_restriction_spaced)
                     if isinstance(self.restrictions, list):
                         self.restrictions.append(block_size_restriction_spaced)

From 5e3512b748b0027ea122904a8cd262dba1a06373 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:39:06 +0100
Subject: [PATCH 135/168] Adjusted path to benchmarking kernels

---
 kernel_tuner/backends/hypertuner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index b15da315a..6348cc56d 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -62,7 +62,7 @@ def compile(self, kernel_instance):
 
         # TODO get applications & GPUs args from benchmark
         gpus = ["A100", "A4000", "MI250X"]
-        folder = "../../autotuning_methodology/benchmark_hub/kernels"
+        folder = "../autotuning_methodology/benchmark_hub/kernels"
         applications = [
             {
                 "name": "dedispersion_milo",

From bff6d7b820300bf2805508b8a34add82917f056e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 12:39:46 +0100
Subject: [PATCH 136/168] Automatically adjust genetic algorithm popsize for
 smaller search spaces

---
 kernel_tuner/strategies/genetic_algorithm.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 0ca0f5f75..6a8565118 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -21,9 +21,15 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
     pop_size, generations, method, mutation_chance = common.get_options(options, _options)
-    pop_size = min(round(searchspace.size / 2), pop_size)
     crossover = supported_methods[method]
 
+    # if left to the default, adjust the popsize to a sensible value for small search spaces
+    if pop_size == _options["popsize"][1]:
+        pop_size = min(round(searchspace.size / 2), pop_size)
+    else:
+        # otherwise, just make sure it doesn't exceed the search space size
+        pop_size = min(searchspace.size, pop_size)
+
     best_score = 1e20
     cost_func = CostFunc(searchspace, tuning_options, runner)
 

From 8ddce18916187c894c0bc94cac9b5bb740aec289 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 13:46:50 +0100
Subject: [PATCH 137/168] Updated poetry configuration fields to project
 configuration fields, updated dependencies

---
 doc/requirements.txt      |  2 +-
 doc/requirements_test.txt | 93 +++++++++++++++++++++++----------------
 pyproject.toml            | 90 +++++++++++++++++--------------------
 3 files changed, 98 insertions(+), 87 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 5f316bb33..fd92b26ff 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -36,7 +36,7 @@ nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or pyth
 nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index 11ed8518b..b5a5c1443 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -189,43 +189,62 @@ nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or pyt
 nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
-    --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
-    --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
-    --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \
-    --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \
-    --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \
-    --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \
-    --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \
-    --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \
-    --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \
-    --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \
-    --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \
-    --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \
-    --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \
-    --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \
-    --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \
-    --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \
-    --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \
-    --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \
-    --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \
-    --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \
-    --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \
-    --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \
-    --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \
-    --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \
-    --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \
-    --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \
-    --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \
-    --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \
-    --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \
-    --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \
-    --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \
-    --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \
-    --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
-    --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
-    --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
+numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286 \
+    --hash=sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542 \
+    --hash=sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f \
+    --hash=sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d \
+    --hash=sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0 \
+    --hash=sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7 \
+    --hash=sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3 \
+    --hash=sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3 \
+    --hash=sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146 \
+    --hash=sha256:2fa8fa7697ad1646b5c93de1719965844e004fcad23c91228aca1cf0800044a1 \
+    --hash=sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6 \
+    --hash=sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc \
+    --hash=sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9 \
+    --hash=sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592 \
+    --hash=sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00 \
+    --hash=sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298 \
+    --hash=sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8 \
+    --hash=sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2 \
+    --hash=sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392 \
+    --hash=sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb \
+    --hash=sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8 \
+    --hash=sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd \
+    --hash=sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9 \
+    --hash=sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0 \
+    --hash=sha256:892c10d6a73e0f14935c31229e03325a7b3093fafd6ce0af704be7f894d95687 \
+    --hash=sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc \
+    --hash=sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f \
+    --hash=sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4 \
+    --hash=sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a \
+    --hash=sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39 \
+    --hash=sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4 \
+    --hash=sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775 \
+    --hash=sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c \
+    --hash=sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd \
+    --hash=sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020 \
+    --hash=sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d \
+    --hash=sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24 \
+    --hash=sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7 \
+    --hash=sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f \
+    --hash=sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba \
+    --hash=sha256:cf28633d64294969c019c6df4ff37f5698e8326db68cc2b66576a51fad634880 \
+    --hash=sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d \
+    --hash=sha256:db1f1c22173ac1c58db249ae48aa7ead29f534b9a948bc56828337aa84a32ed6 \
+    --hash=sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854 \
+    --hash=sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017 \
+    --hash=sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8 \
+    --hash=sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae \
+    --hash=sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4 \
+    --hash=sha256:ea2bb7e2ae9e37d96835b3576a4fa4b3a97592fbea8ef7c3587078b0068b8f09 \
+    --hash=sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff \
+    --hash=sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960 \
+    --hash=sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee \
+    --hash=sha256:f4162988a360a29af158aeb4a2f4f09ffed6a969c9776f8f3bdee9b06a8ab7e5 \
+    --hash=sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c \
+    --hash=sha256:f7de08cbe5551911886d1ab60de58448c6df0f67d9feb7d1fb21e9875ef95e91
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
diff --git a/pyproject.toml b/pyproject.toml
index 2529c7adc..02e70089f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,24 +2,22 @@
 requires = ["poetry-core>=1.7.0", "setuptools>=67.7.2"]
 build-backend = "poetry.core.masonry.api"
 
-[tool.poetry]
+[project]
 name = "kernel_tuner"
-packages = [{ include = "kernel_tuner", from = "." }]
 description = "An easy to use CUDA/OpenCL kernel tuner in Python"
 version = "1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
+readme = "README.md"
 license = "Apache-2.0"
 authors = [
-    "Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>",
-    "Alessio Sclocco <a.sclocco@esciencecenter.nl>",
-    "Stijn Heldens <s.heldens@esciencecenter.nl>",
-    "Floris-Jan Willemsen <f.j.Willemsen@esciencecenter.nl>",
-    "Willem-Jan Palenstijn <w.j.palenstijn@liacs.leidenuniv.nl>",
-    "Bram Veenboer <veenboer@astron.nl>",
-    "Richard Schoonhoven <Richard.Schoonhoven@cwi.nl>",
-    "Leon Oostrum <l.oostrum@esciencecenter.nl",
+    { name = "Ben van Werkhoven", email = "b.vanwerkhoven@esciencecenter.nl"},
+    { name = "Alessio Sclocco", email = "a.sclocco@esciencecenter.nl" },
+    { name = "Stijn Heldens", email = "s.heldens@esciencecenter.nl" },
+    { name = "Floris-Jan Willemsen", email = "f.j.Willemsen@esciencecenter.nl" },
+    { name = "Willem-Jan Palenstijn", email = "w.j.palenstijn@liacs.leidenuniv.nl" },
+    { name = "Bram Veenboer", email = "veenboer@astron.nl" },
+    { name = "Richard Schoonhoven", email = "Richard.Schoonhoven@cwi.nl"  },
+    { name = "Leon Oostrum", email = "l.oostrum@esciencecenter.nl" },
 ]
-
-readme = "README.md"
 keywords = [
     "auto-tuning",
     "gpu",
@@ -44,55 +42,49 @@ classifiers = [
     "Topic :: Software Development",
     "Topic :: System :: Distributed Computing",
 ]
-include = [
-    { path = "test" },
-] # this ensures that people won't have to clone the whole repo to include notebooks, they can just do `pip install kernel_tuner[tutorial,cuda]`
+
+# ATTENTION: if anything is changed here, run `poetry update`
+requires-python = ">=3.10,<3.15"  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
+dependencies = [
+    "numpy>=1.26.0",    # Python 3.12 requires numpy at least 1.26
+    "scipy>=1.14.1",
+    "packaging",        # required by file_utils
+    "jsonschema",
+    "python-constraint2>=2.1.0",
+    "xmltodict",
+    "pandas>=2.0.0",
+    "scikit-learn>=1.0.2",
+]
+# NOTE Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it
+
+[project.urls]
 homepage = "https://KernelTuner.github.io/kernel_tuner/"
 documentation = "https://KernelTuner.github.io/kernel_tuner/"
 repository = "https://github.com/KernelTuner/kernel_tuner"
-[tool.poetry.urls]
-"Tracker" = "https://github.com/KernelTuner/kernel_tuner/issues"
-[tool.poetry.build]
-generate-setup-file = false
-[tool.poetry.scripts]
+changelog = "https://github.com/KernelTuner/kernel_tuner/blob/master/CHANGELOG.md"
+issues = "https://github.com/KernelTuner/kernel_tuner/issues"
+
+[project.scripts]
 kernel_tuner = "kernel_tuner.interface:entry_point"
 
-# ATTENTION: if anything is changed here, run `poetry update`
-[tool.poetry.dependencies]
-python = ">=3.10,<3.15"    # NOTE when changing the supported Python versions, also change the test versions in the noxfile
-numpy = "^1.26.0"          # Python 3.12 requires numpy at least 1.26
-scipy = ">=1.14.1"
-packaging = "*"                 # required by file_utils
-jsonschema = "*"
-python-constraint2 = "^2.1.0"
-xmltodict = "*"
-pandas = ">=2.0.0"
-scikit-learn = ">=1.0.2"
-# Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it
+[tool.poetry]
+packages = [{ include = "kernel_tuner", from = "." }]
+include = [
+    { path = "test" },
+] # this ensures that people won't have to clone the whole repo to include notebooks, they can just do `pip install kernel_tuner[tutorial,cuda]`
 
 # List of optional dependencies for user installation, e.g. `pip install kernel_tuner[cuda]`, used in the below `extras`.
 # Please note that this is different from the dependency groups below, e.g. `docs` and `test`, those are for development.
 # CUDA
-pycuda = { version = "^2024.1", optional = true }           # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
-nvidia-ml-py = { version = "^12.535.108", optional = true }
-pynvml = { version = "^11.4.1", optional = true }
-# cupy-cuda11x = { version = "*", optional = true }    # Note: these are completely optional dependencies as described in CONTRIBUTING.rst
+# cupy-cuda11x = { version = "*", optional = true }    # NOTE: these are completely optional dependencies as described in CONTRIBUTING.rst
 # cupy-cuda12x = { version = "*", optional = true }
 # cuda-python = { version = "*", optional = true }
-# OpenCL
-pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
-# HIP
-hip-python-fork = { version = "*", optional = true }
-# Tutorial (for the notebooks used in the examples)
-jupyter = { version = "^1.0.0", optional = true }
-matplotlib = { version = "^3.5.0", optional = true }
-
-[tool.poetry.extras]
-cuda = ["pycuda", "nvidia-ml-py", "pynvml"]
-opencl = ["pyopencl"]
-cuda_opencl = ["pycuda", "pyopencl"]
+[project.optional-dependencies]
+cuda = ["pycuda>=2024.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
+opencl = ["pyopencl"]                                                   # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
+cuda_opencl = ["pycuda>=2024.1", "pyopencl"]                            # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
 hip = ["hip-python-fork"]
-tutorial = ["jupyter", "matplotlib", "nvidia-ml-py"]
+tutorial = ["jupyter>=1.0.0", "matplotlib>=3.5.0", "nvidia-ml-py>=12.535.108"]
 
 # ATTENTION: if anything is changed here, run `poetry update` and `poetry export --with docs --without-hashes --format=requirements.txt --output doc/requirements.txt`
 # Please note that there is overlap with the `dev` group

From 19470e440f40f5a90a725dc403425a86e52898a1 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 14:47:41 +0100
Subject: [PATCH 138/168] Removed not yet fully implemented bayesian
 optimization references, made tests work

---
 kernel_tuner/interface.py                     |   12 +-
 kernel_tuner/strategies/bayes_opt.py          |    6 +-
 kernel_tuner/strategies/bayes_opt_BOTorch.py  |  245 ----
 kernel_tuner/strategies/bayes_opt_GPyTorch.py |  926 --------------
 .../strategies/bayes_opt_GPyTorch_lean.py     | 1084 -----------------
 .../strategies/bayes_opt_alt_BOTorch.py       |   75 --
 kernel_tuner/strategies/bayes_opt_ax.py       |   29 -
 kernel_tuner/strategies/bayes_opt_old.py      |    2 +-
 test/strategies/test_bayesian_optimization.py |    2 +-
 test/test_searchspace.py                      |   16 -
 10 files changed, 6 insertions(+), 2391 deletions(-)
 delete mode 100644 kernel_tuner/strategies/bayes_opt_BOTorch.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
 delete mode 100644 kernel_tuner/strategies/bayes_opt_ax.py

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 5f4c1b628..9741bd1d8 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -50,11 +50,6 @@
 from kernel_tuner.strategies import (
     basinhopping,
     bayes_opt,
-    bayes_opt_alt_BOTorch,
-    bayes_opt_BOTorch,
-    bayes_opt_GPyTorch,
-    bayes_opt_GPyTorch_lean,
-    bayes_opt_old,
     brute_force,
     diff_evo,
     dual_annealing,
@@ -85,12 +80,7 @@
     "pso": pso,
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
-    "bayes_opt": bayes_opt,
-    "bayes_opt_old": bayes_opt_old,
-    "bayes_opt_GPyTorch": bayes_opt_GPyTorch,
-    "bayes_opt_GPyTorch_lean": bayes_opt_GPyTorch_lean,
-    "bayes_opt_BOTorch": bayes_opt_BOTorch,
-    "bayes_opt_BOTorch_alt": bayes_opt_alt_BOTorch,
+    "bayes_opt": bayes_opt
 }
 
 
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index e4c9c52a2..775e4193a 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -238,7 +238,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.invalid_value = 1e20
         self.opt_direction = opt_direction
         if opt_direction == "min":
-            self.worst_value = np.PINF
+            self.worst_value = np.inf
             self.argopt = np.argmin
         elif opt_direction == "max":
             self.worst_value = np.NINF
@@ -265,7 +265,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.__visited_num = 0
         self.__visited_valid_num = 0
         self.__visited_searchspace_indices = [False] * self.searchspace_size
-        self.__observations = [np.NaN] * self.searchspace_size
+        self.__observations = [np.nan] * self.searchspace_size
         self.__valid_observation_indices = [False] * self.searchspace_size
         self.__valid_params = list()
         self.__valid_observations = list()
@@ -314,7 +314,7 @@ def is_not_visited(self, index: int) -> bool:
 
     def is_valid(self, observation: float) -> bool:
         """Returns whether an observation is valid."""
-        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
+        return not (observation is None or observation == self.invalid_value or observation == np.nan)
 
     def get_af_by_name(self, name: str):
         """Get the basic acquisition functions by their name."""
diff --git a/kernel_tuner/strategies/bayes_opt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_BOTorch.py
deleted file mode 100644
index 5ee2854dc..000000000
--- a/kernel_tuner/strategies/bayes_opt_BOTorch.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""Bayesian Optimization implementation using BO Torch."""
-
-from math import ceil, sqrt
-
-import numpy as np
-
-try:
-    import torch
-    from botorch import fit_gpytorch_mll
-    from botorch.acquisition import (
-        LogExpectedImprovement,
-        ProbabilityOfImprovement,
-        qExpectedUtilityOfBestOption,
-        qLogExpectedImprovement,
-        qLowerBoundMaxValueEntropy,
-    )
-    from botorch.models import MixedSingleTaskGP, SingleTaskGP, SingleTaskVariationalGP
-    from botorch.models.transforms import Normalize, Standardize
-    from botorch.optim import optimize_acqf_discrete
-    from botorch.optim.fit import fit_gpytorch_mll_torch
-    from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
-    from torch import Tensor
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-import gpytorch.settings as gp_settings
-import linear_operator.settings as linop_settings
-
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.common import CostFunc
-from kernel_tuner.util import ErrorConfig, StopCriterionReached
-
-# set gpytorch to approximate mode for faster fitting
-linop_settings._fast_covar_root_decomposition._default = True
-linop_settings._fast_log_prob._default = True
-linop_settings._fast_solves._default = True
-linop_settings.cholesky_max_tries._global_value = 6
-linop_settings.max_cholesky_size._global_value = 800
-gp_settings.max_eager_kernel_size._global_value = 800
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    """The entry function for tuning a searchspace using this algorithm."""
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    bo = BayesianOptimization(searchspace, runner, tuning_options)
-    return bo.run(max_fevals)
-
-class BayesianOptimization():
-    """Bayesian Optimization class."""
-
-    def __init__(self, searchspace: Searchspace, runner, tuning_options):
-        """Initialization of the Bayesian Optimization class. Does not evaluate configurations."""
-        self.initial_sample_taken = False
-        self.initial_sample_size: int = tuning_options.strategy_options.get("popsize", 20)
-        self.tuning_options = tuning_options
-        self.cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, return_invalid=True, return_raw=True)
-        self.maximize = tuning_options['objective_higher_is_better']
-
-        # select the device to use (CUDA or Apple Silicon MPS if available)
-        # TODO keep an eye on Apple Silicon support. Currently `linalg_cholesky` is not yet implemented for MPS (issue reported: https://github.com/pytorch/pytorch/issues/77764).
-        self.tensor_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-        # set up conversion to tensors
-        self.searchspace = searchspace
-        self.searchspace.initialize_tensorspace(dtype=torch.float32, device=self.tensor_device)
-        self.searchspace_tensors = searchspace.get_tensorspace()
-        self.train_X = torch.empty(0, **self.searchspace.tensor_kwargs) # TODO implement continuing from cache
-        self.train_Y = torch.empty(0, **self.searchspace.tensor_kwargs)
-        self.train_Yvar = torch.empty(0, **self.searchspace.tensor_kwargs)
-
-    def is_valid_result(self, result, results=None):
-        """Returns whether the result is valid."""
-        if results is None:
-            results = []
-        return not isinstance(result, ErrorConfig) and not np.isnan(result) and not any(np.isnan(results))
-
-    def run_config(self, config: tuple):
-        """Run a single configuration. Returns the result and whether it is valid."""
-        result, results = self.cost_func(config)
-        results = np.array(results)
-        var = np.nan
-        valid = self.is_valid_result(result, results)
-        if not valid:
-            result = np.nan
-        elif not self.maximize:
-            result = -result
-            results = -results
-        if valid:
-            var = np.var(results)
-        return [result], [var], valid
-
-    def evaluate_configs(self, X: Tensor):
-        """Evaluate a tensor of one or multiple configurations. Modifies train_X and train_Y accordingly."""
-        if isinstance(X, Tensor):
-            valid_configs = []
-            valid_results = []
-            valid_vars = []
-            if X.dim() == 1:
-                X = [X]
-            for config in X:
-                assert isinstance(config, Tensor), f"Config must be a Tensor, but is of type {type(config)} ({config})"
-                param_config = self.searchspace.tensor_to_param_config(config)
-                res, var, valid = self.run_config(param_config)
-                if valid:
-                    valid_configs.append(config)
-                    valid_results.append(res)
-                    valid_vars.append(var)
-                
-                # remove evaluated configurations from the full searchspace
-                index = self.searchspace.get_param_config_index(param_config)
-                self.searchspace_tensors = torch.cat((self.searchspace_tensors[:index], 
-                                                      self.searchspace_tensors[index+1:]))
-
-            # add valid results to the training set
-            if len(valid_configs) > 0 and len(valid_results) > 0 and len(valid_vars) > 0:
-                self.train_X = torch.cat([self.train_X, torch.stack(valid_configs)])
-                self.train_Y = torch.cat([self.train_Y, torch.tensor(valid_results, **self.searchspace.tensor_kwargs)])
-                self.train_Yvar = torch.cat([self.train_Yvar, torch.tensor(valid_vars, **self.searchspace.tensor_kwargs)])
-            return valid_results
-        else:
-            raise NotImplementedError(f"Evaluation has not been implemented for type {type(X)}")
-        
-    def initial_sample(self):
-        """Take an initial sample."""
-        self.initial_sample_taken = True
-        if self.initial_sample_size > 0:
-            sample_indices = torch.from_numpy(self.searchspace.get_random_sample_indices(self.initial_sample_size)).to(self.tensor_device)
-            sample_configs = self.searchspace_tensors.index_select(0, sample_indices)
-            self.evaluate_configs(sample_configs)
-
-    def get_model_and_likelihood(self, searchspace: Searchspace, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor=None, state_dict=None, exact=True):
-        """Initialize a model and likelihood, possibly with a state dict for faster fitting."""
-        bounds, bounds_indices = searchspace.get_tensorspace_bounds()
-        transforms = dict(
-            input_transform=Normalize(d=train_X.shape[-1], indices=bounds_indices, bounds=bounds),
-            outcome_transform=Standardize(m=train_Y.shape[-1], batch_shape=train_X.shape[:-2])
-        )
-
-        # initialize the model
-        if exact:
-            catdims = searchspace.get_tensorspace_categorical_dimensions()
-            if len(catdims) == 0:
-                model = SingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, **transforms)
-            else:
-                model = MixedSingleTaskGP(train_X, train_Y, train_Yvar=train_Yvar, cat_dims=catdims, **transforms)
-        else:
-            model = SingleTaskVariationalGP(train_X, train_Y, **transforms)
-
-        # load the previous state
-        if exact and state_dict is not None:
-            model.load_state_dict(state_dict)
-
-        # initialize the likelihood
-        if exact:
-            mll = ExactMarginalLogLikelihood(model.likelihood, model)
-        else:
-            mll = VariationalELBO(model.likelihood, model.model, num_data=train_Y.size(0))
-        return model, mll
-    
-    def fit(self, mll):
-        """Fit a Marginal Log Likelihood."""
-        return fit_gpytorch_mll(mll, optimizer=fit_gpytorch_mll_torch)
-
-    def run(self, max_fevals: int, max_batch_size=2048):
-        """Run the Bayesian Optimization loop for at most `max_fevals`."""
-        try:
-            if not self.initial_sample_taken:
-                self.initial_sample()
-            model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar)
-            fevals_left = max_fevals - self.initial_sample_size
-
-            # create array to gradually reduce number of optimization spaces as fewer fevals are left
-            tensorspace_size = self.searchspace_tensors.size(0)
-            reserve_final_loops = min(3, fevals_left)   # reserve some loops at the end that are never split
-            fevals_left -= reserve_final_loops
-            num_loops = min(max(round(sqrt(fevals_left*2)), 3), fevals_left)  # set the number of loops for the array
-            avg_optimization_spaces = max(round(sqrt(tensorspace_size / max_batch_size)), 1)  # set the average number of optimization spaces
-            numspace = np.geomspace(start=avg_optimization_spaces, stop=0.1, num=num_loops)
-            nums_optimization_spaces = np.clip(np.round(numspace * (fevals_left / numspace.sum())), a_min=1, a_max=None)
-            # if there's a discrepency, add or subtract the difference from the first number
-            if np.sum(nums_optimization_spaces) != fevals_left:
-                nums_optimization_spaces[0] += fevals_left - np.sum(nums_optimization_spaces)
-            nums_optimization_spaces = np.concatenate([nums_optimization_spaces, np.full(reserve_final_loops, 1)])
-            fevals_left += reserve_final_loops
-
-            # Bayesian optimization loop
-            for loop_i, num_optimization_spaces in enumerate(nums_optimization_spaces):
-                num_optimization_spaces = min(num_optimization_spaces, fevals_left)
-
-                # fit on a Gaussian Process model
-                mll = self.fit(mll)
-                
-                # define the acquisition function
-                acqf = LogExpectedImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                # acqf = NoisyExpectedImprovement(model=model, , maximize=True)
-                # acqf = ProbabilityOfImprovement(model=model, best_f=self.train_Y.max(), maximize=True)
-                # acqf = qLowerBoundMaxValueEntropy(model=model, candidate_set=self.searchspace_tensors, maximize=True)
-                # acqf = qLogExpectedImprovement(model=model, best_f=self.train_Y.max())
-                # acqf = qExpectedUtilityOfBestOption(pref_model=model)
-
-                # divide the optimization space into random chuncks
-                tensorspace_size = self.searchspace_tensors.size(0)
-                if num_optimization_spaces <= 1:
-                    optimization_spaces = [self.searchspace_tensors]
-                else:
-                    # shuffle the searchspace
-                    shuffled_indices = torch.randperm(tensorspace_size)
-                    tensorspace = self.searchspace_tensors[shuffled_indices]
-                    optimization_spaces = tensorspace.split(ceil(tensorspace_size / num_optimization_spaces))
-                
-                # optimize acquisition function to find the next evaluation point
-                for optimization_space in optimization_spaces:
-
-                    # NOTE optimize_acqf_discrete_local_search does not work with variable optimization_space size
-                    # optimize over a lattice if the space is too large
-                    # if len(optimization_spaces) == 1 and max_batch_size < optimization_space.size(0):
-                    #     candidate, _ = optimize_acqf_discrete_local_search(
-                    #         acqf,
-                    #         q=1,
-                    #         discrete_choices=optimization_space,
-                    #         max_batch_size=max_batch_size,
-                    #         num_restarts=5,
-                    #         raw_samples=1024
-                    #     )
-                    # else:
-                    candidate, _ = optimize_acqf_discrete(
-                        acqf, 
-                        q=1, 
-                        choices=optimization_space,
-                        max_batch_size=max_batch_size
-                    )
-                    
-                    # evaluate the new candidate
-                    self.evaluate_configs(candidate)
-                    fevals_left -= 1
-
-                # reinitialize the models so they are ready for fitting on next iteration
-                if loop_i < len(nums_optimization_spaces) - 1:
-                    model, mll = self.get_model_and_likelihood(self.searchspace, self.train_X, self.train_Y, self.train_Yvar, state_dict=model.state_dict())
-        except StopCriterionReached as e:
-            if self.tuning_options.verbose:
-                print(e)
-
-        return self.cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch.py b/kernel_tuner/strategies/bayes_opt_GPyTorch.py
deleted file mode 100644
index 39da1c30d..000000000
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch.py
+++ /dev/null
@@ -1,926 +0,0 @@
-"""Bayesian Optimization implementation from the thesis by Willemsen."""
-import itertools
-import time
-from copy import deepcopy
-from random import randint, shuffle
-from typing import Tuple
-
-import numpy as np
-from scipy.stats import norm
-
-# BO imports
-try:
-    import gpytorch
-    import torch
-    from sklearn.exceptions import ConvergenceWarning
-    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
-    from skopt.sampler import Lhs
-    bayes_opt_present = True
-
-    class ExactGPModel(gpytorch.models.ExactGP):
-        """Very simple exact Gaussian Process model."""
-
-        def __init__(self, train_x, train_y, likelihood):
-            super(gpytorch.models.ExactGP, self).__init__(train_x, train_y, likelihood)
-            self.mean_module = gpytorch.means.ZeroMean()    # TODO maybe try ConstantMean or LinearMean
-            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))    # TODO maybe try ScaleKernel(MaternKernel)
-
-        def forward(self, x):
-            mean_x = self.mean_module(x)
-            covar_x = self.covar_module(x)
-            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-except ImportError:
-    bayes_opt_present = False
-
-    class ExactGPModel():
-        def __init__(self, train_x, train_y, likelihood):
-            raise ImportError("GPyTorch not imported")
-        def forward(self, x):
-            raise ImportError("GPyTorch not imported")
-
-from kernel_tuner import util
-from kernel_tuner.strategies import minimize
-
-supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
-
-
-def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """Generates normalization and denormalization dictionaries."""
-    original_to_normalized = dict()
-    normalized_to_original = dict()
-    for param_name in tune_params.keys():
-        original_to_normalized_dict = dict()
-        normalized_to_original_dict = dict()
-        for value_index, value in enumerate(tune_params[param_name]):
-            normalized_value = eps * value_index + 0.5 * eps
-            normalized_to_original_dict[normalized_value] = value
-            original_to_normalized_dict[value] = normalized_value
-        original_to_normalized[param_name] = original_to_normalized_dict
-        normalized_to_original[param_name] = normalized_to_original_dict
-    return original_to_normalized, normalized_to_original
-
-
-def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """Normalize the parameter space given a normalization dictionary."""
-    keys = list(tune_params.keys())
-    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
-    return param_space_normalized
-
-
-def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict: dict, max_threads: int):
-    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
-    pruned_tune_params_mask = list()
-    removed_tune_params = list()
-    param_names = list(tune_params.keys())
-    for index, key in enumerate(tune_params.keys()):
-        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
-        if len(tune_params[key]) > 1:
-            removed_tune_params.append(None)
-        else:
-            value = tune_params[key][0]
-            normalized = normalize_dict[param_names[index]][value]
-            removed_tune_params.append(normalized)
-    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
-        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
-    # TODO check whether the number of pruned parameters is correct
-    # print(
-    #     f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}, by util: {util.get_number_of_valid_configs(tuning_options, max_threads)}"
-    # )
-    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
-    return parameter_space, removed_tune_params
-
-
-def tune(runner, kernel_options, device_options, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process. Allows setting hyperparameters via the strategy_options key.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    if not bayes_opt_present:
-        raise ImportError(
-            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
-        )
-
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
-    if not bayes_opt_present:
-        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed, please install scikit-learn and scikit-optimize")
-
-    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
-    tune_params = tuning_options.tune_params
-    tuning_options["scaling"] = True
-    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
-
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if tuning_options.restrictions is not None:
-        tuning_options.verbose = False
-    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
-    parameter_space = list(parameter_space)
-    if len(parameter_space) < 1:
-        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
-    if len(parameter_space) == 1:
-        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
-
-    # normalize search space to [0,1]
-    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
-    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
-
-    # prune the parameter space to remove dimensions that have a constant parameter
-    if prune_parameterspace:
-        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict, runner.dev.max_threads)
-    else:
-        parameter_space = list(parameter_space)
-        removed_tune_params = [None] * len(tune_params.keys())
-
-    # initialize and optimize
-    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
-    results = bo.optimize(max_fevals)
-
-    return results, runner.dev.get_environment()
-
-
-class BayesianOptimization():
-
-    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
-                 runner, opt_direction='min'):
-        time_start = time.perf_counter_ns()
-
-        # supported hyperparameter values
-        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
-        self.supported_methods = supported_methods
-        self.supported_sampling_methods = ["random", "lhs"]
-        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
-
-        def get_hyperparam(name: str, default, supported_values=list()):
-            value = tuning_options.strategy_options.get(name, default)
-            if len(supported_values) > 0 and value not in supported_values:
-                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-            return value
-
-        # get hyperparameters
-        get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
-        get_hyperparam("covariancelengthscale", 1.5)
-        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
-        acq = acquisition_function
-        acq_params = get_hyperparam("methodparams", {})
-        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
-        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
-        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
-        self.training_iter = get_hyperparam("training_iter", 10)
-        self.num_initial_samples = get_hyperparam("popsize", 20)
-        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
-        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
-        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
-
-        # set acquisition function hyperparameter defaults where missing
-        if 'explorationfactor' not in acq_params:
-            acq_params['explorationfactor'] = 'CV'
-        if 'zeta' not in acq_params:
-            acq_params['zeta'] = 1
-        if 'skip_duplicate_after' not in acq_params:
-            acq_params['skip_duplicate_after'] = 5
-
-        # set arguments
-        self.kernel_options = kernel_options
-        self.tuning_options = tuning_options
-        self.tune_params = tuning_options.tune_params
-        self.param_names = list(self.tune_params.keys())
-        self.normalized_dict = normalize_dict
-        self.denormalized_dict = denormalize_dict
-        self.runner = runner
-        self.max_threads = runner.dev.max_threads
-        self.log_timings = False
-
-        # set optimization constants
-        self.invalid_value = 1e20
-        self.opt_direction = opt_direction
-        if opt_direction == 'min':
-            self.worst_value = np.PINF
-            self.argopt = np.argmin
-        elif opt_direction == 'max':
-            self.worst_value = np.NINF
-            self.argopt = np.argmax
-        else:
-            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
-
-        # set the acquisition function and surrogate model
-        self.optimize = self.__optimize
-        self.af_name = acquisition_function
-        self.af_params = acq_params
-        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
-        self.set_acquisition_function(acquisition_function)
-        # self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
-
-        # set remaining values
-        self.results = []
-        self.__searchspace = searchspace
-        self.removed_tune_params = removed_tune_params
-        self.searchspace_size = len(self.searchspace)
-        self.hyperparams = {
-            'loss': np.nan,
-            'lengthscale': np.nan,
-            'noise': np.nan,
-        }
-        self.num_dimensions = len(self.dimensions())
-        self.__current_optimum = self.worst_value
-        self.cv_norm_maximum = None
-        self.fevals = 0
-        self.__visited_num = 0
-        self.__visited_valid_num = 0
-        self.__visited_searchspace_indices = [False] * self.searchspace_size
-        self.__observations = [np.NaN] * self.searchspace_size
-        self.__valid_observation_indices = [False] * self.searchspace_size
-        self.__valid_params = list()
-        self.__valid_observations = list()
-        self.unvisited_cache = self.unvisited()
-        time_setup = time.perf_counter_ns()
-        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
-
-        # take initial sample
-        self.initial_sample()
-        time_initial_sample = time.perf_counter_ns()
-
-        # print the timings
-        if self.log_timings:
-            time_taken_setup = round(time_setup - time_start, 3) / 1000
-            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
-            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
-            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
-
-    @property
-    def searchspace(self):
-        return self.__searchspace
-
-    @property
-    def observations(self):
-        return self.__observations
-
-    @property
-    def current_optimum(self):
-        return self.__current_optimum
-
-    @current_optimum.setter
-    def current_optimum(self, value: float):
-        self.__current_optimum = value
-
-    def is_better_than(self, a: float, b: float) -> bool:
-        """Determines which one is better depending on optimization direction."""
-        return a < b if self.opt_direction == 'min' else a > b
-
-    def is_not_visited(self, index: int) -> bool:
-        """Returns whether a searchspace index has not been visited."""
-        return not self.__visited_searchspace_indices[index]
-
-    def is_valid(self, observation: float) -> bool:
-        """Returns whether an observation is valid."""
-        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
-
-    def get_af_by_name(self, name: str):
-        """Get the basic acquisition functions by their name."""
-        basic_af_names = ['ei', 'poi', 'lcb']
-        if name == 'ei':
-            return self.af_expected_improvement
-        elif name == 'poi':
-            return self.af_probability_of_improvement
-        elif name == 'lcb':
-            return self.af_lower_confidence_bound
-        raise ValueError(f"{name} not in {basic_af_names}")
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """Set the acquisition function."""
-        if acquisition_function == 'poi':
-            self.__af = self.af_probability_of_improvement
-        elif acquisition_function == 'ei':
-            self.__af = self.af_expected_improvement
-        elif acquisition_function == 'lcb':
-            self.__af = self.af_lower_confidence_bound
-        elif acquisition_function == 'lcb-srinivas':
-            self.__af = self.af_lower_confidence_bound_srinivas
-        elif acquisition_function == 'random':
-            self.__af = self.af_random
-        elif acquisition_function == 'multi':
-            self.optimize = self.__optimize_multi
-        elif acquisition_function == 'multi-advanced':
-            self.optimize = self.__optimize_multi_advanced
-        elif acquisition_function == 'multi-fast':
-            self.optimize = self.__optimize_multi_fast
-        else:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """Set the surrogate model with a covariance function and lengthscale."""
-        # TODO remove or adapt this
-        if cov_kernel_name == "constantrbf":
-            ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "rbf":
-            RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern32":
-            Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern52":
-            Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
-        else:
-            raise ValueError(f"Acquisition function must be one of {self.supported_cov_kernels}, is {cov_kernel_name}")
-        likelihood = gpytorch.likelihoods.GaussianLikelihood()
-        self.__model = ExactGPModel(train_x, train_y, likelihood)
-        # self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
-
-    def valid_params_observations(self) -> Tuple[list, list]:
-        """Returns a list of valid observations and their parameter configurations."""
-        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
-        params = list()
-        observations = list()
-        for index, valid in enumerate(self.__valid_observation_indices):
-            if valid is True:
-                params.append(self.searchspace[index])
-                observations.append(self.observations[index])
-        return params, observations
-
-    def unvisited(self) -> list:
-        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
-        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
-        return params
-
-    def find_param_config_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the search space if it exists."""
-        return self.searchspace.index(param_config)
-
-    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the unvisited cache if it exists."""
-        return self.unvisited_cache.index(param_config)
-
-    def normalize_param_config(self, param_config: tuple) -> tuple:
-        """Normalizes a parameter configuration."""
-        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return normalized
-
-    def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """Denormalizes a parameter configuration."""
-        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return denormalized
-
-    def unprune_param_config(self, param_config: tuple) -> tuple:
-        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
-        unpruned = list()
-        pruned_count = 0
-        for removed in self.removed_tune_params:
-            if removed is not None:
-                unpruned.append(removed)
-            else:
-                unpruned.append(param_config[pruned_count])
-                pruned_count += 1
-        return tuple(unpruned)
-
-    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """Adjust the visited and valid index records accordingly."""
-        validity = self.is_valid(observation)
-        self.__visited_num += 1
-        self.__observations[index] = observation
-        self.__visited_searchspace_indices[index] = True
-        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
-        self.__valid_observation_indices[index] = validity
-        if validity is True:
-            self.__visited_valid_num += 1
-            self.__valid_params.append(param_config)
-            self.__valid_observations.append(observation)
-            if self.is_better_than(observation, self.current_optimum):
-                self.current_optimum = observation
-
-    def predict(self, x) -> Tuple[float, float]:
-        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
-        return self.__model.predict([x], return_std=True)
-
-    def predict_list(self, lst: list) -> Tuple[np.ndarray, np.ndarray]:
-        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
-        with torch.no_grad(), gpytorch.settings.fast_pred_var():
-            # TODO use torch.cuda for GPU
-            test_x = torch.Tensor(lst)
-            observed_pred = self.__likelihood(self.__model(test_x))
-            mu = observed_pred.mean
-            std = observed_pred.variance
-            return mu.numpy(), std.numpy()
-
-    def evaluate_objective_function(self, param_config: tuple) -> float:
-        """Evaluates the objective function."""
-        param_config = self.unprune_param_config(param_config)
-        denormalized_param_config = self.denormalize_param_config(param_config)
-        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
-            return self.invalid_value
-        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-        self.fevals += 1
-        self.add_model_hyperparams_to_result(denormalized_param_config)
-        return val
-
-    def add_model_hyperparams_to_result(self, param_config: tuple):
-        """Add the model parameters (loss and noise) to the results dict at the last result."""
-        # assert that the results index corresponds to the last index
-        assert self.find_config_index_in_results(param_config) == len(self.results) - 1
-
-        for key, value in self.hyperparams.items():
-            # print(f"{key}: {value}")
-            self.results[-1][key] = value
-
-    def find_config_index_in_results(self, param_config: tuple):
-        """Find the index of a parameter configuration in the results. Beware that this can be very slow!"""
-        found_indices = list()
-        for results_index, result_dict in enumerate(self.results):
-            keys = list(result_dict.keys())
-            found = True
-            for index, value in enumerate(param_config):
-                if result_dict[keys[index]] != value:
-                    found = False
-            if found is True:
-                found_indices.append(results_index)
-        assert len(found_indices) == 1
-        return found_indices[0]
-
-    def dimensions(self) -> list:
-        """List of parameter values per parameter."""
-        return self.tune_params.values()
-
-    def draw_random_sample(self) -> Tuple[list, int]:
-        """Draw a random sample from the unvisited parameter configurations."""
-        if len(self.unvisited_cache) < 1:
-            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
-        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
-        param_config = self.unvisited_cache[index]
-        actual_index = self.find_param_config_index(param_config)
-        return param_config, actual_index
-
-    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """Draws an LHS-distributed sample from the search space."""
-        if self.searchspace_size < num_samples:
-            raise ValueError("Can't sample more than the size of the search space")
-        if self.sampling_crit is None:
-            lhs = Lhs(lhs_type="centered", criterion=None)
-        else:
-            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
-        param_configs = lhs.generate(self.dimensions(), num_samples)
-        indices = list()
-        normalized_param_configs = list()
-        for i in range(len(param_configs) - 1):
-            try:
-                param_config = self.normalize_param_config(param_configs[i])
-                index = self.find_param_config_index(param_config)
-                indices.append(index)
-                normalized_param_configs.append(param_config)
-            except ValueError:
-                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
-                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
-                continue
-        return list(zip(normalized_param_configs, indices))
-
-    def train_model_hyperparams(self):
-        """Train the model and likelihood hyperparameters."""
-        # set to training modes
-        self.__model.train()
-        self.__likelihood.train()
-
-        # Use the adam optimizer
-        optimizer = torch.optim.Adam(self.__model.parameters(), lr=0.1)    # Includes GaussianLikelihood parameters
-
-        # "Loss" for GPs - the marginal log likelihood
-        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.__likelihood, self.__model)
-
-        loss = 0
-        for i in range(self.training_iter):
-            # Zero gradients from previous iteration
-            optimizer.zero_grad()
-            # Output from model
-            output = self.__model(self.__tparams)
-            # Calc loss and backprop gradients
-            loss = -mll(output, self.__tobservations)
-            loss.backward()
-            # print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' %
-            #       (i + 1, self.training_iter, loss.item(), self.__model.covar_module.base_kernel.lengthscale.item(), self.__model.likelihood.noise.item()))
-            optimizer.step()
-
-        # set to prediction mode
-        self.__model.eval()
-        self.__likelihood.eval()
-
-        # set the hyperparameters globally for reference
-        self.hyperparams = {
-            'loss': loss.item(),
-            'lengthscale': self.__model.covar_module.base_kernel.lengthscale.item(),
-            'noise': self.__model.likelihood.noise.item(),
-        }
-        # print(f"Loss: {self.hyperparams['loss']}, lengthscale: {self.hyperparams['lengthscale']}, noise: {self.hyperparams['noise']}")
-
-    def initial_sample(self):
-        """Draws an initial sample using random sampling."""
-        if self.num_initial_samples <= 0:
-            raise ValueError("At least one initial sample is required")
-        if self.sampling_method == 'lhs':
-            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
-        elif self.sampling_method == 'random':
-            samples = list()
-        else:
-            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
-        # collect the samples
-        collected_samples = 0
-        for params, index in samples:
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            if self.is_valid(observation):
-                collected_samples += 1
-        # collect the remainder of the samples
-        while collected_samples < self.num_initial_samples:
-            params, index = self.draw_random_sample()
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            # check for validity to avoid having no actual initial samples
-            if self.is_valid(observation):
-                collected_samples += 1
-
-        # instantiate the model with the initial sample
-        self.__likelihood = gpytorch.likelihoods.GaussianLikelihood()
-        self.__tparams = torch.Tensor(self.__valid_params)
-        self.__tobservations = torch.Tensor(self.__valid_observations)
-        self.__model = ExactGPModel(self.__tparams, self.__tobservations, self.__likelihood)
-        self.train_model_hyperparams()
-
-        # extract the predictions
-        _, std = self.predict_list(self.unvisited_cache)
-        self.initial_sample_mean = np.mean(self.__valid_observations)
-        # Alternatively:
-        # self.initial_sample_std = np.std(self.__valid_observations)
-        # self.initial_sample_mean = np.mean(predictions)
-        self.initial_std = np.mean(std)
-        self.cv_norm_maximum = self.initial_std
-
-    def contextual_variance(self, std: list):
-        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
-        if not self.af_params['explorationfactor'] == 'CV':
-            return None
-        if self.opt_direction == 'min':
-            if self.current_optimum == self.worst_value:
-                return 0.01
-            if self.current_optimum <= 0:
-                # doesn't work well for minimization beyond 0, should that even be a thing?
-                return abs(np.mean(std) / self.current_optimum)
-            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
-            cv = np.mean(std) / improvement_over_initial_sample
-            # normalize if available
-            if self.cv_norm_maximum:
-                cv = cv / self.cv_norm_maximum
-            return cv
-        return np.mean(std) / self.current_optimum
-
-    def __optimize(self, max_fevals):
-        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
-        while self.fevals < max_fevals:
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
-            list_of_acquisition_values = self.__af(predictions, hyperparam)
-            # afterwards select the best AF value
-            best_af = self.argopt(list_of_acquisition_values)
-            candidate_params = self.unvisited_cache[best_af]
-            candidate_index = self.find_param_config_index(candidate_params)
-            observation = self.evaluate_objective_function(candidate_params)
-            self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.train_model_hyperparams()
-        return self.results
-
-    def __optimize_multi(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
-        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
-        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
-        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
-        discount_factor = self.multi_afs_discount_factor
-        # setup the registration of duplicates and runtimes
-        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
-        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
-        skip_af_index = list()
-        af_runtimes = [0, 0, 0]
-        af_observations = [list(), list(), list()]
-        initial_sample_mean = np.mean(self.__valid_observations)
-        while self.fevals < max_fevals:
-            time_start = time.perf_counter_ns()
-            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
-            aqfs = self.multi_afs
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            time_predictions = time.perf_counter_ns()
-            actual_candidate_params = list()
-            actual_candidate_indices = list()
-            actual_candidate_af_indices = list()
-            duplicate_candidate_af_indices = list()
-            duplicate_candidate_original_af_indices = list()
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                timer_start = time.perf_counter()
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                time_taken = time.perf_counter() - timer_start
-                af_runtimes[af_index] += time_taken
-                is_duplicate = best_af in actual_candidate_indices
-                if not is_duplicate:
-                    candidate_params = self.unvisited_cache[best_af]
-                    actual_candidate_params.append(candidate_params)
-                    actual_candidate_indices.append(best_af)
-                    actual_candidate_af_indices.append(af_index)
-                # register whether the AF suggested a duplicate candidate
-                duplicate_candidate_af_count[af_index].pop(0)
-                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
-                if is_duplicate:
-                    # find the index of the AF that first registered the duplicate
-                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
-                    # register that AF as duplicate as well
-                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
-                    duplicate_candidate_af_indices.append(af_index)
-                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
-            time_afs = time.perf_counter_ns()
-            # evaluate the non-duplicate candidates
-            for index, af_index in enumerate(actual_candidate_af_indices):
-                candidate_params = actual_candidate_params[index]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if observation != self.invalid_value:
-                    # we use the registered observations for maximization of the discounted reward
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
-                else:
-                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
-                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
-            for index, af_index in enumerate(duplicate_candidate_af_indices):
-                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
-                af_observations[af_index].append(original_observation)
-            self.train_model_hyperparams()
-            time_eval = time.perf_counter_ns()
-            # assert that all observation lists of non-skipped acquisition functions are of the same length
-            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
-            # find the AFs elligble for being skipped
-            candidates_for_skip = list()
-            for af_index, count in enumerate(duplicate_candidate_af_count):
-                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
-                    candidates_for_skip.append(af_index)
-            # do not skip the AF with the lowest runtime
-            if len(candidates_for_skip) > 1:
-                candidates_for_skip_discounted = list(
-                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
-                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
-                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
-                for af_index in candidates_for_skip:
-                    if af_index == af_not_to_skip:
-                        # do not skip the AF with the lowest runtime and give it a clean slate
-                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
-                        continue
-                    skip_af_index.append(af_index)
-                    if len(skip_af_index) >= len(aqfs):
-                        raise ValueError("There are no acquisition functions left! This should not happen...")
-            time_af_selection = time.perf_counter_ns()
-
-            # printing timings
-            if self.log_timings:
-                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
-                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
-                time_taken_eval = round(time_eval - time_afs, 3) / 1000
-                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
-                time_taken_total = round(time_af_selection - time_start, 3) / 1000
-                print(
-                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
-                    flush=True)
-        return self.results
-
-    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        aqfs = self.multi_afs
-        discount_factor = self.multi_afs_discount_factor
-        required_improvement_factor = self.multi_afs_required_improvement_factor
-        required_improvement_worse = 1 + required_improvement_factor
-        required_improvement_better = 1 - required_improvement_factor
-        min_required_count = self.af_params['skip_duplicate_after']
-        skip_af_index = list()
-        single_af = len(aqfs) <= len(skip_af_index) + 1
-        af_observations = [list(), list(), list()]
-        af_performs_worse_count = [0, 0, 0]
-        af_performs_better_count = [0, 0, 0]
-        while self.fevals < max_fevals:
-            if single_af:
-                return self.__optimize(max_fevals)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            observations_median = np.median(self.__valid_observations)
-            if increase_precision is False:
-                predictions = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(predictions[1])
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                if increase_precision is True:
-                    predictions = self.predict_list(self.unvisited_cache)
-                    hyperparam = self.contextual_variance(predictions[1])
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                # to avoid going out of bounds on the next iteration, remove the best_af
-                predictions = (np.delete(predictions[0], best_af), np.delete(predictions[1], best_af))
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if increase_precision is True:
-                    self.train_model_hyperparams()
-                # we use the registered observations for maximization of the discounted reward
-                if observation != self.invalid_value:
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[af_index].append(reg_observation)
-                else:
-                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
-                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
-                    af_observations[af_index].append(reg_invalid_observation)
-            if increase_precision is False:
-                self.train_model_hyperparams()
-
-            # calculate the mean of discounted observations over the remaining acquisition functions
-            discounted_obs = list(
-                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
-            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
-
-            # register which AFs perform more than 10% better than average and which more than 10% worse than average
-            for af_index, discounted_observation in enumerate(discounted_obs):
-                if discounted_observation > disc_obs_mean * required_improvement_worse:
-                    af_performs_worse_count[af_index] += 1
-                elif discounted_observation < disc_obs_mean * required_improvement_better:
-                    af_performs_better_count[af_index] += 1
-
-            # find the worst AF, discounted observations is leading for a draw
-            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
-            af_index_worst = -1
-            if worst_count >= min_required_count:
-                for af_index, count in enumerate(af_performs_worse_count):
-                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
-                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
-                        af_index_worst = af_index
-
-            # skip the worst AF
-            if af_index_worst > -1:
-                skip_af_index.append(af_index_worst)
-                # reset the counts to even the playing field for the remaining AFs
-                af_performs_worse_count = [0, 0, 0]
-                af_performs_better_count = [0, 0, 0]
-                # if there is only one AF left, register as single AF
-                if len(aqfs) <= len(skip_af_index) + 1:
-                    single_af = True
-                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-                    assert len(af_indices_left) == 1
-                    self.__af = aqfs[af_indices_left[0]]
-            else:
-                # find the best AF, discounted observations is leading for a draw
-                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
-                af_index_best = -1
-                if best_count >= min_required_count:
-                    for af_index, count in enumerate(af_performs_better_count):
-                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
-                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
-                            af_index_best = af_index
-                # make the best AF single
-                if af_index_best > -1:
-                    single_af = True
-                    self.__af = aqfs[af_index_best]
-
-        return self.results
-
-    def __optimize_multi_fast(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
-        while self.fevals < max_fevals:
-            aqfs = self.multi_afs
-            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(predictions[1])
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            for af in aqfs:
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                del predictions[0][best_af]    # to avoid going out of bounds
-                del predictions[1][best_af]
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.train_model_hyperparams()
-        return self.results
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function returning a randomly shuffled list for comparison."""
-        list_random = range(len(self.unvisited_cache))
-        shuffle(list_random)
-        return list_random
-
-    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Probability of Improvement (PI)."""
-        # prefetch required data
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement
-        list_diff_improvement = -((fplus - x_mu) / (x_std + 1E-9))
-
-        # compute probability of improvement with CDF in bulk
-        list_prob_improvement = norm.cdf(list_diff_improvement)
-        return list_prob_improvement
-
-    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Expected Improvement (EI)."""
-        # prefetch required data
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement, CDF and PDF in bulk
-        list_diff_improvement = (fplus - x_mu) / (x_std + 1E-9)
-        list_cdf = norm.cdf(list_diff_improvement)
-        list_pdf = norm.pdf(list_diff_improvement)
-
-        # compute expected improvement in bulk
-        list_exp_improvement = -((fplus - x_mu) * list_cdf + x_std * list_pdf)
-        return list_exp_improvement
-
-    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (LCB)."""
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        beta = hyperparam
-
-        # compute LCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
-        return list_lower_confidence_bound
-
-    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
-        # prefetch required data
-        x_mu, x_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-
-        # precompute beta parameter
-        zeta = self.af_params['zeta']
-        t = self.fevals
-        d = self.num_dimensions
-        delta = hyperparam
-        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
-
-        # compute UCB in bulk
-        list_lower_confidence_bound = (x_mu - beta * x_std)
-        return list_lower_confidence_bound
-
-    def visualize_after_opt(self):
-        """Visualize the model after the optimization."""
-        print(self.__model.kernel_.get_params())
-        print(self.__model.log_marginal_likelihood())
-        import matplotlib.pyplot as plt
-        mu, std = self.predict_list(self.searchspace)
-        brute_force_observations = list()
-        for param_config in self.searchspace:
-            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-            if obs == self.invalid_value:
-                obs = None
-            brute_force_observations.append(obs)
-        x_axis = range(len(mu))
-        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
-        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
-        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
-        plt.legend()
-        plt.show()
diff --git a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py b/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
deleted file mode 100644
index d584c0e3b..000000000
--- a/kernel_tuner/strategies/bayes_opt_GPyTorch_lean.py
+++ /dev/null
@@ -1,1084 +0,0 @@
-"""Lean implementation of Bayesian Optimization with GPyTorch."""
-# python
-import ast  # for casting strings to dict
-import warnings
-from copy import deepcopy
-from math import ceil
-from random import choice, randint, shuffle
-from typing import Tuple
-
-# external
-import numpy as np
-from numpy.random import default_rng
-
-from kernel_tuner.runners.runner import Runner
-from kernel_tuner.searchspace import Searchspace
-
-# optional
-try:
-    import gpytorch
-    import torch
-    # import arviz as az
-    bayes_opt_present = True
-
-    from torch import Tensor
-
-    class ExactGPModel(gpytorch.models.ExactGP):
-        def __init__(self, train_x, train_y, likelihood, cov_kernel_name: str, cov_kernel_lengthscale: float):
-            super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
-            self.mean_module = gpytorch.means.ZeroMean()
-            if cov_kernel_name == 'matern':
-                self.covar_module = gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale)
-            elif cov_kernel_name == 'matern_scalekernel':
-                self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=cov_kernel_lengthscale))
-
-        def forward(self, x):
-            mean_x = self.mean_module(x)
-            covar_x = self.covar_module(x)
-            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-except ImportError:
-    bayes_opt_present = False
-
-    class Tensor():
-        pass
-
-    class ExactGPModel():
-        def __init__(self, train_x, train_y, likelihood):
-            raise ImportError("GPyTorch not imported")
-        def forward(self, x):
-            raise ImportError("GPyTorch not imported")
-
-
-# set supported hyperparameter values
-supported_precisions = ['float', 'double']
-supported_initial_sample_methods = ['lhs', 'index', 'minmax','random']
-supported_methods = ['ei', 'poi', 'random']
-supported_cov_kernels = ['matern', 'matern_scalekernel']
-supported_likelihoods = ['Gaussian', 'GaussianPrior', 'FixedNoise']
-supported_optimizers = ['LBFGS', 'Adam', 'AdamW', 'Adagrad', 'ASGD']
-
-
-# set complex hyperparameter defaults
-def default_optimizer_learningrates(key):
-    defaults = {
-        'LBFGS': 1,
-        'Adam': 0.001,
-        'AdamW': 0.001,
-        'ASGD': 0.01,
-        'Adagrad': 0.01
-    }
-    return defaults[key]
-
-
-def tune(searchspace: Searchspace, runner: Runner, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    if not bayes_opt_present:
-        raise ImportError(
-            "Error: optional dependencies for Bayesian Optimization not installed, please install torch and gpytorch"
-        )
-
-    # set CUDA availability
-    use_cuda = False
-    cuda_available = torch.cuda.is_available() and use_cuda
-    device = torch.device("cuda:0" if cuda_available else "cpu")
-    if cuda_available:
-        print(f"CUDA is available, device: {torch.cuda.get_device_name(device)}")
-
-    # retrieve options with defaults
-    options = tuning_options.strategy_options
-    optimization_direction = options.get("optimization_direction", 'min')
-    num_initial_samples = int(options.get("popsize", 20))
-    max_fevals = int(options.get("max_fevals", 220))
-
-    # enabling scaling will unscale and snap inputs on evaluation, more efficient to scale all at once and keep unscaled values
-    tuning_options["snap"] = False
-    tuning_options["scaling"] = False
-
-    # prune the search space using restrictions
-    parameter_space = searchspace.list.copy()
-
-    # limit max_fevals to max size of the parameter space
-    max_fevals = min(len(parameter_space), max_fevals)
-    if max_fevals < num_initial_samples:
-        raise ValueError(
-            f"Maximum number of function evaluations ({max_fevals}) can not be lower than or equal to the number of initial samples ({num_initial_samples}), you might as well brute-force."
-        )
-
-    # execute Bayesian Optimization
-    BO = BayesianOptimization(parameter_space, tuning_options, runner, num_initial_samples, optimization_direction, device)
-    all_results = BO.optimize(max_fevals)
-
-    return all_results, runner.dev.get_environment()
-
-
-class BayesianOptimization:
-
-    def __init__(self, parameter_space: list, tuning_options, runner: Runner, num_initial_samples: int, optimization_direction: str,
-                 device) -> None:
-        self.animate = False    # TODO remove
-
-        # set defaults
-        self.num_initial_samples = num_initial_samples
-        self.fevals = 0
-        self.all_results = []
-        self.unique_results = {}
-        self.current_optimal_config = None
-
-        # set Kernel Tuner data
-        self.tuning_options = tuning_options
-        self.runner = runner
-        self.max_threads = runner.dev.max_threads
-
-        # get precision options
-        self.dtype = torch.float if self.get_hyperparam("precision", "float", supported_precisions) == "float" else torch.double
-        self.min_std = self.get_hyperparam("minimum_std", 1e-6, type=float)
-
-        # get tuning options
-        self.initial_sample_method = self.get_hyperparam("initialsamplemethod", "lhs", supported_initial_sample_methods)
-        self.initial_sample_random_offset_factor = self.get_hyperparam("initialsamplerandomoffsetfactor", 0.1, type=float)    # 0.1
-        self.initial_training_iter = self.get_hyperparam("initialtrainingiter", 5, type=int)    # 5
-        self.training_after_iter = self.get_hyperparam("trainingafteriter", 1, type=int)    # 1
-        self.cov_kernel_name = self.get_hyperparam("covariancekernel", "matern_scalekernel", supported_cov_kernels)
-        self.cov_kernel_lengthscale = self.get_hyperparam("covariancelengthscale", 1.5, type=float)
-        self.likelihood_name = self.get_hyperparam("likelihood", "Gaussian", supported_likelihoods)
-        self.optimizer_name = self.get_hyperparam("optimizer", "LBFGS", supported_optimizers)
-        self.optimizer_learningrate = self.get_hyperparam("optimizer_learningrate", self.optimizer_name, type=float, cast=default_optimizer_learningrates)
-        acquisition_function_name = self.get_hyperparam("method", "ei", supported_methods)
-        af_params = self.get_hyperparam("methodparams", {}, type=dict, cast=ast.literal_eval)
-
-        # set acquisition function options
-        self.set_acquisition_function(acquisition_function_name)
-        if 'explorationfactor' not in af_params:
-            af_params['explorationfactor'] = 0.1    # 0.1
-        self.af_params = af_params
-
-        # set Tensors
-        self.device: torch.device = device
-        self.out_device = torch.device("cpu")
-        self.size = len(parameter_space)
-        self.index_counter = torch.arange(self.size)
-        # the unvisited_configs and valid_configs are to be used as boolean masks on the other tensors, more efficient than adding to / removing from tensors
-        self.unvisited_configs = torch.ones(self.size, dtype=torch.bool).to(device)
-        self.valid_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
-        self.inital_sample_configs = torch.zeros(self.size, dtype=torch.bool).to(device)
-        self.results = torch.zeros(self.size, dtype=self.dtype).to(device) * np.nan    # x (param configs) and y (results) must be the same type
-        self.results_std = torch.ones(self.size, dtype=self.dtype).to(device)    # only a valid assumption if outputs are normalized
-
-        # transform non-numerical parameters to numerical, keep true_param_configs for evaluation function
-        self.param_configs, self.tune_params = self.transform_nonnumerical_params(parameter_space)
-        self.true_param_configs = parameter_space
-
-        # set scaling
-        self.scaled_input = True
-        self.scaled_output = True
-        if not self.scaled_input:
-            self.param_configs_scaled = self.param_configs
-        else:
-            self.apply_scaling_to_inputs()
-
-        # set optimization settings
-        self.invalid_value = 1e20
-        self.optimization_direction = optimization_direction
-        if self.optimization_direction == 'min':
-            self.is_better_than = lambda a, b: a < b
-            self.inf_value = np.PINF
-            self.opt = torch.min
-            self.argopt = torch.argmin
-        elif self.optimization_direction == 'max':
-            self.is_better_than = lambda a, b: a > b
-            self.inf_value = np.NINF
-            self.opt = torch.max
-            self.argopt = torch.argmax
-        else:
-            raise ValueError(f"Invalid optimization direction {self.optimization_direction}")
-
-        # set the model
-        self.current_optimum = self.inf_value
-        self.hyperparams = {
-            'loss': np.nan,
-            'lengthscale': np.nan,
-            'noise': np.nan,
-        }
-        self.hyperparams_means = {
-            'loss': np.array([]),
-            'lengthscale': np.array([]),
-            'noise': np.array([]),
-        }
-
-        # initialize the model
-        if not self.runner.simulation_mode:
-            self.import_cached_evaluations()
-        self.initialize_model()
-
-    @property
-    def train_x(self):
-        """Get the valid parameter configurations."""
-        return self.param_configs_scaled[self.valid_configs].to(self.device)
-
-    @property
-    def train_y(self):
-        """Get the valid results."""
-        outputs = self.results[self.valid_configs]
-        if self.scaled_output:
-            # z-score, remove mean and make unit variance to scale it to N(0,1)
-            # alternatively, first min-max the outputs between -1 and +1 and apply a Fisher transformation (np.arctanh)
-            outputs = (outputs - outputs.mean()) / outputs.std()
-        return outputs
-
-    @property
-    def train_y_err(self):
-        """Get the error on the valid results."""
-        std = self.results_std[self.valid_configs]
-        if self.scaled_output and std.std() > 0.0:
-            std = (std - std.mean()) / std.std()    # use z-score to get normalized variability
-        return std
-
-    @property
-    def test_x(self):
-        """Get the not yet visited parameter configurations."""
-        return self.param_configs_scaled[self.unvisited_configs].to(self.device)
-
-    @property
-    def test_x_unscaled(self):
-        """Get the unscaled, not yet visited parameter configurations."""
-        return self.param_configs[self.unvisited_configs]
-
-    @property
-    def test_y_err(self):
-        """Get the expected error on the test set."""
-        train_y_err = self.train_y_err
-        return torch.full((self.size - len(train_y_err), ), torch.mean(train_y_err))
-
-    @property
-    def invalid_x(self):
-        """Get the invalid parameter configurations by checking which visited configs are not valid (equivalent to checking which unvisited configs are valid)."""
-        invalid_mask = (self.unvisited_configs == self.valid_configs)
-        return self.param_configs[invalid_mask]
-
-    def true_param_config_index(self, target_index: int) -> int:
-        """The index required to get the true config param index when dealing with test_x."""
-        # get the index of the #index-th True (for example the 9th+1 True could be index 13 because there are 4 Falses in between)
-        masked_counter = self.index_counter[self.unvisited_configs]
-        return masked_counter[target_index]
-
-    def true_param_config_indices(self, target_indices: Tensor) -> Tensor:
-        """Same as true_param_config_index, but for an array of targets instead."""
-        masked_counter = self.index_counter[self.unvisited_configs]
-        return masked_counter.index_select(0, target_indices)
-
-    def initialize_model(self, take_initial_sample=True, train_hyperparams=True):
-        """Initialize the surrogate model."""
-        # self.initial_sample_std = self.min_std
-        if take_initial_sample:
-            self.initial_sample()
-
-        # create the model
-        if self.likelihood_name == 'Gaussian':
-            self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
-        elif self.likelihood_name == 'GaussianPrior':
-            raise NotImplementedError("Gaussian Prior likelihood has not been implemented yet")
-        elif self.likelihood_name == 'FixedNoise':
-            self.likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=self.train_y_err.clamp(min=self.min_std), learn_additional_noise=True)
-        self.likelihood = self.likelihood.to(self.device)
-        self.model = ExactGPModel(self.train_x, self.train_y, self.likelihood, self.cov_kernel_name, self.cov_kernel_lengthscale)
-
-        # Find optimal model hyperparameters
-        self.model.train()
-        self.likelihood.train()
-        model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
-
-        # set the optimizer
-        # LBFGS is probably better as Adam is first-order
-        if self.optimizer_name == 'LBFGS':
-            self.optimizer = torch.optim.LBFGS(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'Adam':
-            self.optimizer = torch.optim.Adam(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'AdamW':
-            self.optimizer = torch.optim.AdamW(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'ASGD':
-            self.optimizer = torch.optim.ASGD(model_parameters, lr=self.optimizer_learningrate)
-        elif self.optimizer_name == 'Adagrad':
-            self.optimizer = torch.optim.Adagrad(model_parameters, lr=self.optimizer_learningrate)
-
-        self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model).to(self.device)
-        if train_hyperparams:
-            self.train_hyperparams(self.initial_training_iter)
-        else:
-            self.train_hyperparams(0)
-
-    def import_cached_evaluations(self):
-        """Import the previously evaluated configurations into this run."""
-        # make strings of all the parameter configurations in the search space
-        param_config_strings = list()
-        for param_config in self.true_param_configs:
-            param_config_strings.append(",".join([str(v) for v in param_config]))
-
-        # load the results from the cache into the run
-        cache = self.tuning_options.cache
-        if len(cache.keys()) > 0:
-            print("Previous cachefile found while not in simulation mode, importing previous evaluations.")
-        for param_config_string, result in cache.items():
-            # get the index of the string in the search space
-            param_config_index = param_config_strings.index(param_config_string)
-            time = self.evaluate_config(param_config_index)
-            assert time == result['time']
-        print(f"Imported {len(self.all_results)} previously evaluated configurations.")
-
-    def initial_sample(self):
-        """Take an initial sample of the parameter space."""
-        list_param_config_indices = list(self.index_counter[~self.unvisited_configs])
-
-        # generate a random offset from a normal distribution to add to the sample indices
-        rng = default_rng()
-        if self.initial_sample_random_offset_factor > 0.5:
-            raise ValueError("Random offset factor should not be greater than 0.5 to avoid overlapping index offsets")
-        random_offset_size = (self.size / self.num_initial_samples) * self.initial_sample_random_offset_factor
-        random_offsets = np.round(rng.standard_normal(self.num_initial_samples) * random_offset_size)
-
-        # first apply the initial sampling method
-        if self.initial_sample_method == 'lhs' and self.num_initial_samples - self.fevals > 1:
-            indices = self.get_lhs_samples(random_offsets)
-            for param_config_index in indices.tolist():
-                if param_config_index in list_param_config_indices:
-                    continue
-                list_param_config_indices.append(param_config_index)
-                self.evaluate_config(param_config_index)
-        elif self.initial_sample_method == 'random':
-            while self.fevals < self.num_initial_samples:
-                param_config_index = randint(0, self.size - 1)
-                if param_config_index in list_param_config_indices:
-                    continue
-                list_param_config_indices.append(param_config_index)
-                self.evaluate_config(param_config_index)
-        elif self.initial_sample_method == 'minmax':
-            list_param_config_indices += self.take_min_max_initial_samples(list_param_config_indices)
-
-        # then take index-spaced samples until all samples are valid
-        while self.fevals < self.num_initial_samples:
-            least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-            param_config_index = min(max(int(least_evaluated_region_index + random_offsets[self.fevals].item()), 0), self.size - 1)
-            if param_config_index in list_param_config_indices:
-                warnings.warn(
-                    f"An already evaluated configuration ({param_config_index}) was selected for index-spaced sampling. " +
-                    "If this happens regularly, reduce the initial sample random offset factor.", AlreadyEvaluatedConflict)
-                param_config_index = least_evaluated_region_index
-            list_param_config_indices.append(param_config_index)
-            self.evaluate_config(param_config_index)
-
-        # set the current optimum, initial sample mean and initial sample std
-        self.current_optimum = self.opt(self.train_y).item()
-        self.initial_sample_mean = self.train_y.mean().item()
-        self.initial_sample_std = self.train_y.std().item()
-        # self.initial_sample_std = self.min_std    # temporary until the predictive posterior has been taken
-
-        # save a boolean mask of the initial samples
-        self.inital_sample_configs = self.valid_configs.detach().clone()
-
-    def get_lhs_samples(self, random_offsets: np.ndarray) -> Tensor:
-        """Get a centered Latin Hypercube Sample with a random offset."""
-        n_samples = self.num_initial_samples - self.fevals
-
-        # first get the seperate parameter values to make possibly fictional distributed parameter configurations
-        temp_param_configs = [[] for _ in range(n_samples)]
-        for param_values in self.tune_params.values():
-            l = len(param_values)
-
-            # determine the interval and offset
-            interval = l / n_samples
-            offset = 0
-            if l > n_samples:
-                # take the difference between the last index and the end of the list, and the first index and the start of the list
-                offset = ((l - 1 - interval * n_samples) - interval) / 2
-
-            # assemble the parameter configurations
-            for i in range(n_samples):
-                index = ceil(offset + interval * (i + 1)) - 1
-                temp_param_configs[i].append(param_values[index])
-
-        # create a tensor of the possibly fictional parameter configurations
-        param_configs = torch.tensor(list(tuple(param_config) for param_config in temp_param_configs), dtype=self.dtype).to(self.device)
-        param_configs = param_configs.unique(dim=0)    # remove duplicates
-        n_samples_unique = len(param_configs)
-
-        # get the indices of the parameter configurations
-        num_params = len(self.param_configs[0])
-        minimum_required_num_matching_params = round(num_params *
-                                                     0.75)    # set the number of parameter matches allowed to be dropped before the search is stopped
-        param_configs_indices = torch.full((n_samples_unique, ), -1, dtype=torch.int)
-        for selected_index, selected_param_config in enumerate(param_configs):
-            # for each parameter configuration, count the number of matching parameters
-            required_num_matching_params = num_params
-            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
-            match_mask = (matching_params == required_num_matching_params)
-            # if there is not at least one matching parameter configuration, lower the required number of matching parameters
-            found_num_matching_param_configs = match_mask.count_nonzero()
-            while found_num_matching_param_configs < 1 and required_num_matching_params > minimum_required_num_matching_params:
-                required_num_matching_params -= 1
-                match_mask = (matching_params == required_num_matching_params)
-                found_num_matching_param_configs = match_mask.count_nonzero()
-
-            # if more than one possible parameter configuration has been found, pick a random one
-            if found_num_matching_param_configs > 1:
-                index = choice(self.index_counter[match_mask])
-            elif found_num_matching_param_configs == 1:
-                index = self.index_counter[match_mask].item()
-            else:
-                # if no matching parameter configurations were found
-                continue
-
-            # set the selected index
-            param_configs_indices[selected_index] = min(max(int(index + random_offsets[selected_index].item()), 0), self.size - 1)
-
-        # filter -1 indices and duplicates that occurred because of the random offset
-        param_configs_indices = param_configs_indices[param_configs_indices >= 0]
-        param_configs_indices = param_configs_indices.unique().type(torch.int)
-        if len(param_configs_indices) < n_samples / 2:
-            warnings.warn(
-                str(f"{n_samples - len(param_configs_indices)} out of the {n_samples} LHS samples were duplicates or -1." +
-                    f"This might be because you have few initial samples ({n_samples}) relative to the number of parameters ({num_params})." +
-                    "Perhaps try something other than LHS."))
-        return param_configs_indices
-
-    def take_min_max_initial_samples(self, list_param_config_indices: list, samples_per_parameter=1) -> list:
-        """Take the minimum parameters and the maximum for each parameter to establish the effect of individual parameters."""
-        # number of samples required is at least (samples_per_parameter) * (number of parameters) + 1
-
-        # first get the individual parameter values and sort them
-        params_values = list(self.tune_params.values())
-        for param_values in params_values:
-            param_values.sort()
-
-        number_of_params = len(params_values)
-        if self.num_initial_samples - self.fevals < samples_per_parameter * number_of_params + 1:
-            raise ValueError(f"There are not enough initial samples available ({self.num_initial_samples - self.fevals}) to do minmax initial sampling. At least {samples_per_parameter * number_of_params + 1} samples are required.")
-
-        # then take the minimum parameter configuration using BFS, this is used as the base
-        # instead of BFS, you could also search for the minimal sum of indices
-        minimum_index = None
-        param_level = 0
-        param_moving_index = -1
-        while minimum_index is None and self.num_initial_samples - self.fevals:
-            # create the minimum base configuration and find it in the search space
-            selected_param_config = torch.tensor(tuple(param_values[param_level+1] if param_index == param_moving_index else param_values[min(param_level, len(param_values)-1)] for param_index, param_values in enumerate(params_values)), dtype=self.dtype).to(self.device)
-            matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
-            match_mask = (matching_params == number_of_params)
-            found_num_matching_param_configs = match_mask.count_nonzero()
-            temp_index = self.index_counter[match_mask]
-            # check if the configuration exists and is succesfully evaluated
-            if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
-                minimum_index = temp_index.item()
-                minimum_config = self.param_configs[minimum_index]
-                if minimum_index not in list_param_config_indices:
-                    list_param_config_indices.append(minimum_index)
-            # if it doesn't exist and evaluate, do a breadth-first search for the minimum configuration
-            else:
-                proceed = False
-                while not proceed:
-                    # first look at the current level
-                    if param_moving_index < len(params_values) - 1:
-                        param_moving_index += 1
-                        # if the param_level + 1 exceeds the number of parameters, try the next parameter
-                        if len(params_values[param_moving_index]) <= param_level + 1:
-                            param_moving_index += 1
-                        else:
-                            proceed = True
-                    # if nothing is found, proceed to the next level
-                    else:
-                        param_level += 1
-                        param_moving_index = -1
-                        proceed = True
-        if minimum_index is None:
-            raise ValueError(f"Could not evaluate the minimum base configuration in {self.num_initial_samples} samples.")
-
-        # next take the maximum for each individual parameter using DFS
-        for param_index, param_values in enumerate(params_values):
-            if len(param_values) <= 1:
-                continue
-            maximum_index = None
-            param_moving_level = len(param_values) - 1
-            while maximum_index is None and self.num_initial_samples - self.fevals > 0:
-                # take the minimum configuration as base
-                selected_param_config = minimum_config.clone()
-                # change only the currently selected parameter and look up the configuration in the search space
-                selected_param_config[param_index] = param_values[param_moving_level]
-                matching_params = torch.count_nonzero(self.param_configs == selected_param_config, -1)
-                match_mask = (matching_params == number_of_params)
-                found_num_matching_param_configs = match_mask.count_nonzero()
-                temp_index = self.index_counter[match_mask]
-                if found_num_matching_param_configs == 1 and (temp_index.item() in list_param_config_indices or self.evaluate_config(temp_index.item()) < self.invalid_value):
-                    maximum_index = temp_index.item()
-                    if maximum_index not in list_param_config_indices:
-                        list_param_config_indices.append(maximum_index)
-                # if it doesn't exist and evaluate, move one parameter value down
-                else:
-                    param_moving_level -= 1
-                    if param_moving_level < 0:
-                        raise ValueError(f"No instance of parameter {param_index} is present in the search space and succesfully evaluated")
-            if maximum_index is None:
-                raise ValueError(f"Could not evaluate the maximum configuration for {param_index+1} out of {len(params_values)} within {self.num_initial_samples} samples.")
-
-        return list_param_config_indices
-
-    def get_middle_index_of_least_evaluated_region(self) -> int:
-        """Get the middle index of the region of parameter configurations that is the least visited."""
-        # This uses the largest distance between visited parameter configurations. That means it does not properly take the parameters into account, only the index of the parameter configurations, whereas LHS does.
-        distance_tensor = torch.arange(self.size)
-
-        # first get the indices that were visited (must be in ascending order)
-        indices_visited = self.index_counter[~self.unvisited_configs]
-
-        # then reset the range after the visited index
-        for index_visited in indices_visited:
-            distance_tensor[index_visited:] = torch.arange(self.size - index_visited)
-
-        biggest_distance_index = distance_tensor.argmax()
-        biggest_distance = distance_tensor[biggest_distance_index].item()
-        middle_index = biggest_distance_index - round(biggest_distance / 2)
-        # print(f"Max distance {biggest_distance}, index: {middle_index}, between: {biggest_distance_index-biggest_distance}-{biggest_distance_index}")
-        return middle_index
-
-    def train_hyperparams(self, training_iter: int):
-        """Optimize the surrogate model hyperparameters iteratively."""
-        self.model.train()
-        self.likelihood.train()
-
-        def closure():
-            self.optimizer.zero_grad()
-            output = self.model(self.train_x)    # get model output
-            try:
-                loss = -self.mll(output, self.train_y)    # calculate loss and backprop gradients
-                loss.backward()
-                # large sudden increase in loss signals numerical instability
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore", category=RuntimeWarning)
-                    no_nan_losses = self.hyperparams_means['loss'][~np.isnan(self.hyperparams_means['loss'])]
-                    if len(no_nan_losses) > 1 and loss.item() > np.mean(no_nan_losses) * 2:
-                        warnings.warn("Avoiding loss surge, aborting training", AvoidedLossSurgeWarning)
-                        return np.nan
-                return loss
-            except gpytorch.utils.errors.NotPSDError:
-                warnings.warn("Matrix not positive definite during training", NotPSDTrainingWarning)
-                return np.nan
-            except RuntimeError as e:
-                warnings.warn(str(e), RuntimeWarning)
-
-        loss = None
-        for _ in range(training_iter):
-            try:
-                _loss = self.optimizer.step(closure)
-                if _loss is np.nan:
-                    break
-                loss = _loss
-            except gpytorch.utils.errors.NanError:
-                warnings.warn("PSD_safe_Cholesky failed due to too many NaN", NaNTrainingWarning)
-                break
-            except TypeError as e:
-                warnings.warn(str(e), RuntimeWarning)
-                break
-
-        # set the hyperparams to the new values
-        try:
-            lengthscale = float(self.model.covar_module.lengthscale.item())
-        except AttributeError:
-            lengthscale = float(self.model.covar_module.base_kernel.lengthscale.item())
-        loss = float(loss.item()) if loss is not None else np.nan
-        noise = float(self.model.likelihood.noise.mean().detach())
-        self.hyperparams = {
-            'loss': loss,
-            'lengthscale': lengthscale,
-            'noise': noise,
-        }
-        self.hyperparams_means['loss'] = np.append(self.hyperparams_means['loss'], loss)
-        self.hyperparams_means['lengthscale'] = np.append(self.hyperparams_means['lengthscale'], lengthscale)
-        self.hyperparams_means['noise'] = np.append(self.hyperparams_means['noise'], noise)
-
-        # get into evaluation (predictive posterior) mode
-        self.model.eval()
-        self.likelihood.eval()
-
-    def optimize(self, max_fevals: int) -> Tuple[tuple, float]:    #NOSONAR
-        """Optimize the objective."""
-        predictions_tuple = None
-        short_param_config_index = None
-        last_invalid = False
-        report_multiple_minima = ceil(round(self.size / 10))    # if more than 10% of the space is minima, print a warning
-        use_contextual_variance = self.af_params['explorationfactor'] == 'CV'
-        while self.fevals < max_fevals:
-            if last_invalid:
-                # TODO no need to get the predictions again as the predictions are unchanged, just set the invalid param config mean to the worst non-NAN value and the std to 0
-                # predictions_tuple[0][short_param_config_index] = torch.nanmean(predictions_tuple[0])
-                # predictions_tuple[1][short_param_config_index] = 0
-                predictions_tuple = self.remove_from_predict_list(predictions_tuple, short_param_config_index)
-            else:
-                predictions_tuple = self.predict_list()
-                # if self.initial_sample_std <= self.min_std:
-                # self.initial_sample_std = min(max(predictions_tuple[1].mean().item(), self.min_std), 10.0)
-            # if there are NaN or all of the predicted std are the same, take from the least evaluated region
-            mean_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[0])).item())
-            std_has_NaN = bool(torch.any(torch.isnan(predictions_tuple[1])).item())
-            if mean_has_NaN or std_has_NaN or torch.all(predictions_tuple[1] == predictions_tuple[1][0]):
-                least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-                param_config_index = least_evaluated_region_index
-                short_param_config_index = -1
-                if mean_has_NaN:
-                    warning_reason = "there were NaN in the predicted mean"
-                elif std_has_NaN:
-                    warning_reason = "there were NaN in the predicted std"
-                else:
-                    warning_reason = "all STDs were the same"
-                warnings.warn(
-                    f"After {self.fevals}/{max_fevals} fevals, {warning_reason}, picking one from the least evaluated region and resetting the surrogate model",
-                    ResetModelWarning)
-                self.initialize_model(take_initial_sample=False, train_hyperparams=True)
-            else:
-                # otherwise, optimize the acquisition function to find the next candidate
-                hyperparam = self.contextual_variance(predictions_tuple[0], predictions_tuple[1]) if use_contextual_variance else None
-                acquisition_values = self.acquisition_function(predictions_tuple, hyperparam)
-                short_param_config_index = self.argopt(acquisition_values)
-                param_config_index = self.true_param_config_index(short_param_config_index)
-
-                # if there are multiple minima in the acquisition function values, we want to take one from the least evaluated region
-                min_acquisition_function_value = acquisition_values[short_param_config_index]
-                indices_where_min = (acquisition_values <= min_acquisition_function_value).nonzero(as_tuple=True)[0]
-                if len(indices_where_min) > 1:
-                    # first get the true index for the minima
-                    true_indices_where_min = self.true_param_config_indices(indices_where_min)
-                    # then get the index of the least evaluated region
-                    least_evaluated_region_index = self.get_middle_index_of_least_evaluated_region()
-                    # now find the minima closest to the least evaluated region
-                    param_config_index = self.find_nearest(least_evaluated_region_index, true_indices_where_min)
-                    short_param_config_index = -1    # invalidate the short_param_config_index because we bypassed it
-                    if len(indices_where_min) > report_multiple_minima:
-                        warnings.warn(
-                            f"After {self.fevals}/{max_fevals} fevals, there were multiple minima in the acquisition values ({len(indices_where_min)}), picking one based on the least evaluated region",
-                            MultipleMinimaWarning)
-
-            # evaluate and register the result
-            result = self.evaluate_config(param_config_index)
-            if result == self.invalid_value and short_param_config_index > -1:
-                # can't use last_invalid if short_param_config_index is not set
-                last_invalid = True
-            else:
-                last_invalid = False
-                self.model.set_train_data(self.train_x, self.train_y, strict=False)
-                # do not train if there are multiple minima, because it introduces numerical instability or insolvability
-                if self.training_after_iter > 0 and (self.fevals % self.training_after_iter == 0):
-                    self.train_hyperparams(training_iter=1)    # TODO experiment with other training iter
-                # set the current optimum
-                self.current_optimum = self.opt(self.train_y).item()
-            # print(f"Valid: {len(self.train_x)}, unvisited: {len(self.test_x)}, invalid: {len(self.invalid_x)}, last invalid: {last_invalid}")
-            if self.animate:
-                self.visualize()
-
-        return self.all_results
-
-    def objective_function(self, param_config: tuple) -> float:
-        return self.runner.run([param_config], self.tuning_options)
-
-    def evaluate_config(self, param_config_index: int) -> float:
-        """Evaluates a parameter configuration, returns the time."""
-        param_config = self.true_param_configs[param_config_index]
-        time = self.objective_function(param_config)
-        self.register_result(time, param_config_index)
-        self.update_unique_results()
-        self.fevals = len(self.unique_results)
-        return time
-
-    def register_result(self, result: float, param_config_index: int):
-        """Registers the result to the Tensors and adds the hyperparameters to the results dict."""
-        # set the unvisited Tensors
-        if self.unvisited_configs[param_config_index] is False:
-            raise ValueError(f"The param config index {param_config_index} was already set to False!")
-        self.unvisited_configs[param_config_index] = False
-
-        # set the results Tensors
-        last_result = self.all_results[-1]
-        if result != self.invalid_value:
-            self.valid_configs[param_config_index] = True
-            self.results[param_config_index] = result
-            # assert last_result['time'] == result TODO remove
-            self.results_std[param_config_index] = max(np.std(last_result['times']), self.min_std)
-
-        # add the current model parameters to the last entry of the results dict
-        if len(self.all_results) < 1:
-            return
-        for key, value in self.hyperparams.items():
-            last_result["hyperparam_" + key] = value
-        self.all_results[-1] = last_result
-        # TODO check if it is possible to write the results with hyperparameters to the cache if not in simulation mode, maybe with observer?
-
-    def update_unique_results(self):
-        """Updates the unique results dictionary."""
-        record = self.all_results[-1]
-        # make a unique string by taking every value in a result, if it already exists, it is overwritten
-        self.unique_results.update({",".join([str(v) for k, v in record.items() if k in self.tuning_options.tune_params]): record["time"]})
-
-    def predict_list(self) -> Tuple[Tensor, Tensor]:
-        """Returns the means and standard deviations predicted by the surrogate model for the unvisited parameter configurations."""
-        with torch.no_grad(), gpytorch.settings.fast_pred_samples(), gpytorch.settings.fast_pred_var():
-            try:
-                observed_pred = self.likelihood(self.model(self.test_x))
-                mu = observed_pred.mean
-                std = observed_pred.variance.clamp(min=self.min_std)    # TODO .sqrt() or not? looks like without is better
-                return mu, std
-            except gpytorch.utils.errors.NanError:
-                warnings.warn("NaN error during predictions", NaNPredictionWarning)
-                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
-            except gpytorch.utils.errors.NotPSDError:
-                warnings.warn("NotPSD error during predictions", NotPSDPredictionWarning)
-                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
-            except RuntimeError as e:
-                warnings.warn(str(e), RuntimeWarning)
-                return torch.ones_like(self.test_x), torch.zeros_like(self.test_x)
-
-    def get_diff_improvement(self, y_mu, y_std, fplus) -> Tensor:
-        """Compute probability of improvement by assuming normality on the difference in improvement."""
-        diff_improvement = (y_mu - fplus) / y_std    # y_std can be very small, causing diff_improvement to be very large
-        diff_improvement = (diff_improvement - diff_improvement.mean()) / max(diff_improvement.std(), self.min_std)    # force to N(0,1) with z-score
-        if self.optimization_direction == 'max':
-            diff_improvement = -diff_improvement
-        return diff_improvement
-
-    def contextual_variance(self, mean: Tensor, std: Tensor):
-        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
-        if not self.af_params['explorationfactor'] == 'CV':
-            raise ValueError(f"Contextual Variance was called, but is not set as the exploration factor ({self.af_params['explorationfactor']})")
-        if self.optimization_direction == 'max':
-            raise NotImplementedError("Contextual Variance has not yet been implemented for maximisation")
-        if self.current_optimum == self.inf_value:
-            return 0.01
-        if self.scaled_output:
-            improvement_over_initial_sample = (abs(self.current_optimum) - self.initial_sample_mean) / self.initial_sample_std
-            improvement_over_current_sample = (abs(self.current_optimum) - self.train_y.mean().item()) / std.mean().item()
-            improvement_diff = improvement_over_current_sample - improvement_over_initial_sample
-            # the closer the improvement over the current sample is to the improvement over the initial sample, the greater the exploration
-            # x = 1 - max(max(1 - improvement_diff, 0.2), 0.0)
-            x = 1 - max(min(improvement_diff, 1) * 0.2, 0.0)
-            # the smaller the difference between the initial sample error and current sample error, the greater the exploration
-            # x = 1 - min(max(self.initial_sample_std - std.mean().item(), 1.0), 0.8)
-            # print(self.initial_sample_std, std.mean().item())
-            cv = np.log10(x) + 0.1    # at x=0.0, y=0.1; at x=0.2, y=0.003; at x=0.2057, y=0.0.
-            return cv
-        else:
-            raise NotImplementedError("Contextual Variance has not yet been implemented for non-scaled outputs")
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function returning a randomly shuffled list for comparison."""
-        list_random = list(range(len(self.unvisited_param_configs)))
-        shuffle(list_random)
-        return list_random
-
-    def af_probability_of_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
-        """Acquisition function Probability of Improvement (PoI) tensor-based."""
-        # prefetch required data
-        y_mu, y_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
-        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
-        cdf = normal.cdf(diff_improvement)
-
-        # # sanity check
-        # if torch.all(cdf == cdf[0]):
-        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
-        return cdf
-
-    def af_expected_improvement_tensor(self, predictions: Tuple[Tensor, Tensor], hyperparam=None) -> Tensor:
-        """Acquisition function Expected Improvement (EI) tensor-based."""
-        # prefetch required data
-        y_mu, y_std = predictions
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        diff_improvement = self.get_diff_improvement(y_mu, y_std, fplus)
-        normal = torch.distributions.Normal(torch.zeros_like(diff_improvement), torch.ones_like(diff_improvement))
-        cdf = normal.cdf(diff_improvement)
-        pdf = torch.exp(normal.log_prob(diff_improvement))
-
-        # # sanity check
-        # if torch.all(cdf == cdf[0]) and torch.all(pdf == pdf[0]):
-        #     raise FloatingPointError("You need to scale the diff_improvement-values!")
-
-        # compute expected improvement in bulk
-        exp_improvement = (pdf + diff_improvement + y_std * cdf)
-        # alternative exp_improvement = y_std * (pdf + diff_improvement * cdf)
-        # alternative exp_improvement = -((fplus - y_mu) * cdf + y_std * pdf)
-        return exp_improvement
-
-    """                  """
-    """ Helper functions """
-    """                  """
-
-    def apply_scaling_to_inputs(self):
-        """Scale the inputs using min-max normalization (0-1) and remove constant parameters."""
-        param_configs_scaled = torch.zeros_like(self.param_configs)
-
-        # first get the scaling factors of each parameter
-        v_min_list = list()
-        v_diff_list = list()
-        unchanging_params_list = list()
-        for param_values in self.tune_params.values():
-            v_min = min(param_values)
-            v_max = max(param_values)
-            v_min_list.append(v_min)
-            v_diff_list.append(v_max - v_min)
-            unchanging_params_list.append(v_min == v_max)
-
-        # then set each parameter value to the scaled value
-        for param_index in range(len(self.param_configs[0])):
-            v_min = v_min_list[param_index]
-            v_diff = v_diff_list[param_index]
-            param_configs_scaled[:, param_index] = torch.sub(self.param_configs[:, param_index], v_min).div(v_diff)
-
-        # finally remove parameters that are constant by applying a mask
-        unchanging_params_tensor = ~torch.tensor(unchanging_params_list, dtype=torch.bool)
-        # if torch.all(unchanging_params_tensor == False):
-        # raise ValueError(f"All of the parameter configurations ({self.size}) are the same: {self.param_configs[0]}, nothing to optimize")
-        nonstatic_param_count = torch.count_nonzero(unchanging_params_tensor)
-        self.param_configs_scaled = torch.zeros([len(param_configs_scaled), nonstatic_param_count], dtype=self.dtype)
-        for param_config_index, param_config in enumerate(param_configs_scaled):
-            self.param_configs_scaled[param_config_index] = param_config[unchanging_params_tensor]
-        self.nonstatic_params = unchanging_params_tensor
-
-    def find_nearest(self, value, array: Tensor):
-        """Find the value nearest to the given value in the array."""
-        index = (torch.abs(array - value)).argmin()
-        return array[index]
-
-    def get_hyperparam(self, name: str, default, supported_values=list(), type=None, cast=None):
-        """Retrieve the value of a hyperparameter based on the name - beware that cast can be a reference to any function."""
-        value = self.tuning_options.strategy_options.get(name, default)
-
-        # check with predifined value list
-        if len(supported_values) > 0 and value not in supported_values:
-            raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-        # cast to type if provided
-        if type and not isinstance(value, type):
-            if cast:
-                value = cast(value)
-            else:
-                value = type(value)
-
-        # exceptions with more complex types
-        if value == 'methodparams' and 'explorationfactor' in value and value['explorationfactor'] != 'CV':
-            value = float(value)
-        return value
-
-    def remove_from_predict_list(self, p: Tuple[Tensor, Tensor], i: int) -> Tuple[Tensor, Tensor]:
-        """Remove an index from a tuple of predictions."""
-        return torch.cat([p[0][:i], p[0][i + 1:]]), torch.cat([p[1][:i], p[1][i + 1:]])
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """Set the acquisition function based on the name."""
-        if acquisition_function not in supported_methods:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-        if acquisition_function == 'poi':
-            self.acquisition_function = self.af_probability_of_improvement_tensor
-        elif acquisition_function == 'ei':
-            self.acquisition_function = self.af_expected_improvement_tensor
-        elif acquisition_function == 'random':
-            self.acquisition_function = self.af_random
-
-    def transform_nonnumerical_params(self, parameter_space: list) -> Tuple[Tensor, dict]:
-        """Transform non-numerical or mixed-type parameters to numerical Tensor, also return new tune_params."""
-        parameter_space = deepcopy(parameter_space)
-        number_of_params = len(parameter_space[0])
-
-        # find out which parameters have nonnumerical or mixed types, and create a range of integers instead
-        nonnumericals_exist = False
-        nonnumerical_type = torch.zeros(number_of_params, dtype=torch.bool)
-        nonnumerical_values = [[] for _ in range(number_of_params)]
-        tune_params = deepcopy(self.tuning_options.tune_params)
-        for param_index, (param_key, param_values) in enumerate(self.tuning_options.tune_params.items()):
-            if not all(isinstance(v, (int, float, complex)) for v in param_values):
-                nonnumericals_exist = True
-                nonnumerical_type[param_index] = True
-                nonnumerical_values[param_index] = param_values
-                tune_params[param_key] = range(len(param_values))
-
-        # overwrite the nonnumerical parameters with numerical parameters
-        if nonnumericals_exist:
-            self.tuning_options["snap"] = False    # snapping is only possible with numerical values
-            for param_config_index, param_config in enumerate(parameter_space):
-                parameter_space[param_config_index] = list(param_config)
-                for param_index, param_value in enumerate(param_config):
-                    if nonnumerical_type[param_index]:
-                        # just use the index of the non-numerical value instead of the value
-                        new_value = nonnumerical_values[param_index].index(param_value)
-                        parameter_space[param_config_index][param_index] = new_value
-
-        return torch.tensor(parameter_space, dtype=self.dtype).to(self.device), tune_params
-
-    def visualize(self):
-        """Visualize the surrogate model and observations in a plot."""
-        if self.fevals < 220:
-            return None
-        from matplotlib import pyplot as plt
-        with torch.no_grad(), gpytorch.settings.fast_pred_var():
-            # Initialize plot
-            f = plt.figure(constrained_layout=True, figsize=(10, 8))
-            subfigures = f.subfigures(2, 1)
-            ax = subfigures[0].subplots(1, 1)
-            axes2 = subfigures[1].subplots(1, 3)
-            ax.set_ylabel('Value')
-            ax.set_xlabel('Parameter')
-
-            param_configs = self.true_param_configs
-
-            # get true function
-            objective_results = np.array([])
-            for param_config in param_configs:
-                result = self.objective_function(tuple(param_config))
-                if result == self.invalid_value:
-                    result = np.nan
-                objective_results = np.append(objective_results, result)
-            if self.scaled_output:
-                objective_results = (objective_results - objective_results.mean()) / objective_results.std()
-
-            if len(param_configs[0]) == 1:
-                ax.plot(np.linspace(param_configs[0], param_configs[-1], self.size), objective_results, 'r')
-            else:
-                ax.plot(range(self.size), objective_results, 'r')
-
-            # take the parameter values for 1D, otherwise the indices
-            if len(param_configs[0]) == 1:
-                x_axis_param_configs = param_configs
-                test_x_x_axis = self.test_x_unscaled.squeeze().to(self.out_device).numpy()
-            else:
-                x_axis_param_configs = torch.arange(self.size)
-                test_x_x_axis = x_axis_param_configs[self.unvisited_configs].to(self.out_device)
-
-            # Get upper and lower confidence bounds
-            observed_pred = self.likelihood(self.model(self.test_x))
-            lower, upper = observed_pred.confidence_region()
-            lower, upper = lower.to(self.out_device), upper.to(self.out_device)
-
-            # Plot initial sample as green stars
-            initial_sample_x_axis = x_axis_param_configs[self.inital_sample_configs].to(self.out_device)
-            initial_sample_y_axis = self.results[self.inital_sample_configs].to(self.out_device)
-            ax.plot(initial_sample_x_axis.numpy(), initial_sample_y_axis.numpy(), 'g*')
-
-            # Plot training data as black stars
-            mask_training_data_no_initial_sample = ~self.inital_sample_configs == self.valid_configs
-            training_x_axis = x_axis_param_configs[mask_training_data_no_initial_sample].to(self.out_device)
-            training_y_axis = self.results[mask_training_data_no_initial_sample].to(self.out_device)
-            ax.plot(training_x_axis.numpy(), training_y_axis.numpy(), 'k*')
-
-            # Plot predictive means as blue line
-            test_x_y_axis = observed_pred.mean.to(self.out_device)
-            ax.plot(test_x_x_axis, test_x_y_axis.numpy(), 'b')
-
-            # Shade between the lower and upper confidence bounds
-            ax.fill_between(test_x_x_axis, lower.numpy(), upper.numpy(), alpha=0.5)
-
-            # set the limits and legend
-            # ax.set_ylim(min(objective_results), max(filter(lambda x: x != self.invalid_value, objective_results)))
-            ax.legend(['Objective Function', 'Initial Sample', 'Observed Data', 'Mean', 'Confidence'])
-
-            # draw the hyperparameter plots
-            # loss
-            axes2[0].plot(self.hyperparams_means['loss'])
-            axes2[0].set_ylabel('Loss')
-            axes2[0].set_xlabel('Number of evaluations')
-            # lengthscale
-            axes2[1].plot(self.hyperparams_means['lengthscale'])
-            axes2[1].set_ylabel('Lengthscale')
-            axes2[1].set_xlabel('Number of evaluations')
-            # noise
-            axes2[2].plot(self.hyperparams_means['noise'])
-            axes2[2].set_ylabel('Noise')
-            axes2[2].set_xlabel('Number of evaluations')
-
-            if self.animate:
-                # f.canvas.draw()
-                plt.savefig('animation_last_graph')
-                # plt.pause(0.1)
-
-            # plt.show()
-
-
-class CustomWarning(Warning):
-
-    def __init__(self, message: str, category: str) -> None:
-        # super().__init__()
-        self.message = message
-        self.category = category
-
-    def __str__(self):
-        return repr(self.message)
-
-    def category(self):
-        return self.category.__name__
-
-
-class AvoidedLossSurgeWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "AvoidedLossSurgeWarning")
-
-
-class NotPSDTrainingWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NotPSDTrainingWarning")
-
-
-class NaNTrainingWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NaNTrainingWarning")
-
-
-class NaNPredictionWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NaNPredictionWarning")
-
-
-class NotPSDPredictionWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "NotPSDPredictionWarning")
-
-
-class ResetModelWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "ResetModelWarning")
-
-
-class MultipleMinimaWarning(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "MultipleMinimaWarning")
-
-
-class AlreadyEvaluatedConflict(CustomWarning):
-
-    def __init__(self, message: str) -> None:
-        super().__init__(message, "AlreadyEvaluatedConflict")
diff --git a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py b/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
deleted file mode 100644
index cf733cdde..000000000
--- a/kernel_tuner/strategies/bayes_opt_alt_BOTorch.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""BOTorch package from https://github.com/pytorch/botorch."""
-from __future__ import print_function
-
-from collections import OrderedDict
-
-try:
-    pass
-except Exception:
-    BayesianOptimization = None
-    bayes_opt_present = False
-
-from kernel_tuner.strategies import minimize
-
-supported_methods = ["poi", "ei", "ucb"]
-
-
-def tune(runner, kernel_options, device_options, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    if not bayes_opt_present:
-        raise ImportError("Error: optional dependency Bayesian Optimization not installed")
-    init_points = tuning_options.strategy_options.get("popsize", 20)
-    n_iter = tuning_options.strategy_options.get("max_fevals", 100)
-
-    # defaults as used by Bayesian Optimization Python package
-    acq = tuning_options.strategy_options.get("method", "ucb")
-    kappa = tuning_options.strategy_options.get("kappa", 2.576)
-    xi = tuning_options.strategy_options.get("xi", 0.0)
-
-    tuning_options["scaling"] = True
-
-    results = []
-
-    # function to pass to the optimizer
-    def func(**kwargs):
-        args = [kwargs[key] for key in tuning_options.tune_params.keys()]
-        return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results)
-
-    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
-    pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds))
-
-    verbose = 0
-    if tuning_options.verbose:
-        verbose = 2
-
-    # print(np.isnan(init_points).any())
-
-    optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose)
-
-    optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi)
-
-    if tuning_options.verbose:
-        print(optimizer.max)
-
-    return results, runner.dev.get_environment()
diff --git a/kernel_tuner/strategies/bayes_opt_ax.py b/kernel_tuner/strategies/bayes_opt_ax.py
deleted file mode 100644
index 2bb3ce8fc..000000000
--- a/kernel_tuner/strategies/bayes_opt_ax.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Bayesian Optimization implementation using the Ax platform."""
-
-from ax import optimize
-
-from kernel_tuner import util
-from kernel_tuner.searchspace import Searchspace
-from kernel_tuner.strategies.common import (
-    CostFunc,
-)
-
-
-def tune(searchspace: Searchspace, runner, tuning_options):
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
-
-    ax_searchspace = searchspace.to_ax_searchspace()
-
-    try:
-        best_parameters, best_values, experiment, model = optimize(
-            parameters=ax_searchspace.parameters,
-            parameter_constraints=ax_searchspace.parameter_constraints,
-            # Booth function
-            evaluation_function=cost_func,
-            minimize=True,
-        )
-    except util.StopCriterionReached as e:
-        if tuning_options.verbose:
-            print(e)
-
-    return cost_func.results
diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
index c3381731a..a55790e66 100644
--- a/kernel_tuner/strategies/bayes_opt_old.py
+++ b/kernel_tuner/strategies/bayes_opt_old.py
@@ -187,7 +187,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
         self.invalid_value = 1e20
         self.opt_direction = opt_direction
         if opt_direction == 'min':
-            self.worst_value = np.PINF
+            self.worst_value = np.inf
             self.argopt = np.argmin
         elif opt_direction == 'max':
             self.worst_value = np.NINF
diff --git a/test/strategies/test_bayesian_optimization.py b/test/strategies/test_bayesian_optimization.py
index dd206a37b..8d929054a 100644
--- a/test/strategies/test_bayesian_optimization.py
+++ b/test/strategies/test_bayesian_optimization.py
@@ -74,7 +74,7 @@ def test_bo_initialization():
     assert BO.searchspace == pruned_parameter_space
     assert BO.unvisited_cache == pruned_parameter_space
     assert len(BO.observations) == len(pruned_parameter_space)
-    assert BO.current_optimum == np.PINF
+    assert BO.current_optimum == np.inf
 
 def test_bo_initial_sample_lhs():
     sample = BO.draw_latin_hypercube_samples(num_samples=1)
diff --git a/test/test_searchspace.py b/test/test_searchspace.py
index eaf546387..7b43fc722 100644
--- a/test/test_searchspace.py
+++ b/test/test_searchspace.py
@@ -181,22 +181,6 @@ def test_param_index_lookup():
     assert simple_searchspace.get_param_indices(last) == (3, 1, 1)
 
 
-def test_get_tensorspace():
-    """Test the generation of a tensor space."""
-    tensorspace = simple_searchspace.get_tensorspace()
-    assert tensorspace.shape == simple_searchspace.get_list_numpy().shape
-
-
-def test_conversion_tensor_param_config():
-    """Test the conversion from a parameter configuration to a tensor and tensor to parameter configuration."""
-    for config in simple_searchspace_single.list:
-        tensor = simple_searchspace_single.param_config_to_tensor(config)
-        config_2 = simple_searchspace_single.tensor_to_param_config(tensor)
-        assert config == config_2
-        assert tensor.equal(simple_searchspace_single.param_config_to_tensor(config_2))
-        assert len(tensor) == len(config) - 1
-
-
 def test_random_sample():
     """Test whether the random sample indices exists and are unique, and if it throws an error for too many samples."""
     random_sample_indices = searchspace.get_random_sample_indices(100)

From d2bb76a43d5d6cec310ace6c5ceadeb4e9a4b920 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 18:23:10 +0100
Subject: [PATCH 139/168] Avoid import of whole util module

---
 kernel_tuner/integration.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py
index d3219ba87..938c8c7c9 100644
--- a/kernel_tuner/integration.py
+++ b/kernel_tuner/integration.py
@@ -4,7 +4,7 @@
 
 from jsonschema import validate
 
-from kernel_tuner import util
+from kernel_tuner.util import get_instance_string, looks_like_a_filename, read_file
 
 #specifies for a number of pre-defined objectives whether
 #the objective should be minimized or maximized (boolean value denotes higher is better)
@@ -205,8 +205,8 @@ def top_result(item):
         meta["version_number"] = "1.0"
         meta["kernel_name"] = kernel_name
         if kernel_string and not callable(kernel_string) and not isinstance(kernel_string, list):
-            if util.looks_like_a_filename(kernel_string):
-                meta["kernel_string"] = util.read_file(kernel_string)
+            if looks_like_a_filename(kernel_string):
+                meta["kernel_string"] = read_file(kernel_string)
             else:
                 meta["kernel_string"] = kernel_string
         meta["objective"] = objective
@@ -337,7 +337,7 @@ def _select_best_common_config(results, objective, objective_higher_is_better):
     for config in results:
         params = config["tunable_parameters"]
 
-        config_str = util.get_instance_string(params)
+        config_str = get_instance_string(params)
         #count occurances
         results_table[config_str] = results_table.get(config_str,0) + 1
         #add to performance

From 58f147fe7bfb96a9fc201a7bce03e298b4f2930e Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 18:24:01 +0100
Subject: [PATCH 140/168] Avoid import of whole util module

---
 .gitignore                |  2 +-
 doc/requirements.txt      |  4 +-
 doc/requirements_test.txt | 93 ++++++++++++++++-----------------------
 pyproject.toml            |  2 +-
 4 files changed, 41 insertions(+), 60 deletions(-)

diff --git a/.gitignore b/.gitignore
index ce4873209..1f576769a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 poetry.lock
 noxenv.txt
 noxsettings.toml
-hyperparamtuning/*
+hyperparamtuning*/*
 *.prof
 
 ### Python ###
diff --git a/doc/requirements.txt b/doc/requirements.txt
index fd92b26ff..378dccc76 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -36,7 +36,7 @@ nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or pyth
 nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
@@ -45,7 +45,7 @@ pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_
 platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt"
 pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
 pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index b5a5c1443..11ed8518b 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -189,62 +189,43 @@ nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or pyt
 nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==2.2.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286 \
-    --hash=sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542 \
-    --hash=sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f \
-    --hash=sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d \
-    --hash=sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0 \
-    --hash=sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7 \
-    --hash=sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3 \
-    --hash=sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3 \
-    --hash=sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146 \
-    --hash=sha256:2fa8fa7697ad1646b5c93de1719965844e004fcad23c91228aca1cf0800044a1 \
-    --hash=sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6 \
-    --hash=sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc \
-    --hash=sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9 \
-    --hash=sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592 \
-    --hash=sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00 \
-    --hash=sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298 \
-    --hash=sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8 \
-    --hash=sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2 \
-    --hash=sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392 \
-    --hash=sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb \
-    --hash=sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8 \
-    --hash=sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd \
-    --hash=sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9 \
-    --hash=sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0 \
-    --hash=sha256:892c10d6a73e0f14935c31229e03325a7b3093fafd6ce0af704be7f894d95687 \
-    --hash=sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc \
-    --hash=sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f \
-    --hash=sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4 \
-    --hash=sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a \
-    --hash=sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39 \
-    --hash=sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4 \
-    --hash=sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775 \
-    --hash=sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c \
-    --hash=sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd \
-    --hash=sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020 \
-    --hash=sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d \
-    --hash=sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24 \
-    --hash=sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7 \
-    --hash=sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f \
-    --hash=sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba \
-    --hash=sha256:cf28633d64294969c019c6df4ff37f5698e8326db68cc2b66576a51fad634880 \
-    --hash=sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d \
-    --hash=sha256:db1f1c22173ac1c58db249ae48aa7ead29f534b9a948bc56828337aa84a32ed6 \
-    --hash=sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854 \
-    --hash=sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017 \
-    --hash=sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8 \
-    --hash=sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae \
-    --hash=sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4 \
-    --hash=sha256:ea2bb7e2ae9e37d96835b3576a4fa4b3a97592fbea8ef7c3587078b0068b8f09 \
-    --hash=sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff \
-    --hash=sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960 \
-    --hash=sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee \
-    --hash=sha256:f4162988a360a29af158aeb4a2f4f09ffed6a969c9776f8f3bdee9b06a8ab7e5 \
-    --hash=sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c \
-    --hash=sha256:f7de08cbe5551911886d1ab60de58448c6df0f67d9feb7d1fb21e9875ef95e91
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+    --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
+    --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
+    --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
+    --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \
+    --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \
+    --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \
+    --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \
+    --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \
+    --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \
+    --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \
+    --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \
+    --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \
+    --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \
+    --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \
+    --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \
+    --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \
+    --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \
+    --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \
+    --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \
+    --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \
+    --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \
+    --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \
+    --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \
+    --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \
+    --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \
+    --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \
+    --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \
+    --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \
+    --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \
+    --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \
+    --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \
+    --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \
+    --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \
+    --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
+    --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
+    --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
 packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
diff --git a/pyproject.toml b/pyproject.toml
index 02e70089f..d6453286d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ classifiers = [
 # ATTENTION: if anything is changed here, run `poetry update`
 requires-python = ">=3.10,<3.15"  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
 dependencies = [
-    "numpy>=1.26.0",    # Python 3.12 requires numpy at least 1.26
+    "numpy (>=1.26.0,<2.0.0)",    # Python 3.12 requires numpy at least 1.26, CuPy does not support 2.0
     "scipy>=1.14.1",
     "packaging",        # required by file_utils
     "jsonschema",

From a48394a33e6880bbf5f22144997ee2fe99f8967a Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 20 Mar 2025 18:54:05 +0100
Subject: [PATCH 141/168] Avoid import of whole util module

---
 kernel_tuner/strategies/basinhopping.py        |  4 ++--
 kernel_tuner/strategies/bayes_opt.py           |  2 +-
 kernel_tuner/strategies/diff_evo.py            |  4 ++--
 kernel_tuner/strategies/dual_annealing.py      |  4 ++--
 kernel_tuner/strategies/firefly_algorithm.py   |  6 +++---
 kernel_tuner/strategies/genetic_algorithm.py   |  6 +++---
 kernel_tuner/strategies/greedy_ils.py          |  4 ++--
 kernel_tuner/strategies/greedy_mls.py          |  4 ++--
 kernel_tuner/strategies/minimize.py            |  4 ++--
 kernel_tuner/strategies/pso.py                 |  4 ++--
 kernel_tuner/strategies/random_sample.py       |  4 ++--
 kernel_tuner/strategies/simulated_annealing.py |  4 ++--
 test/strategies/test_strategies.py             |  4 ++--
 test/test_compiler_functions.py                |  4 ++--
 test/test_integration.py                       | 12 ++++++------
 test/test_kernelbuilder.py                     |  4 ++--
 16 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py
index 20e800f6e..eed906676 100644
--- a/kernel_tuner/strategies/basinhopping.py
+++ b/kernel_tuner/strategies/basinhopping.py
@@ -1,7 +1,7 @@
 """The strategy that uses the basinhopping global optimization method."""
 import scipy.optimize
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options
@@ -31,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     try:
         opt_result = scipy.optimize.basinhopping(cost_func, x0, T=T, stepsize=eps,
                                              minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 775e4193a..451a0d5eb 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -149,7 +149,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         if max_fevals - bo.fevals <= 0:
             raise ValueError("No function evaluations left for optimization after sampling")
         bo.optimize(max_fevals)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py
index cd089ae1e..d77772992 100644
--- a/kernel_tuner/strategies/diff_evo.py
+++ b/kernel_tuner/strategies/diff_evo.py
@@ -1,7 +1,7 @@
 """The differential evolution strategy that optimizes the search through the parameter space."""
 from scipy.optimize import differential_evolution
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -32,7 +32,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     try:
         opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population_enc,
                                         polish=False, strategy=method, disp=tuning_options.verbose)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py
index 7d9868c5e..598151ea5 100644
--- a/kernel_tuner/strategies/dual_annealing.py
+++ b/kernel_tuner/strategies/dual_annealing.py
@@ -1,7 +1,7 @@
 """The strategy that uses the dual annealing optimization method."""
 import scipy.optimize
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options
@@ -31,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     opt_result = None
     try:
         opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0, maxfun=max_fevals)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
index dc43aae6f..f4309d638 100644
--- a/kernel_tuner/strategies/firefly_algorithm.py
+++ b/kernel_tuner/strategies/firefly_algorithm.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
@@ -42,7 +42,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     for j in range(num_particles):
         try:
             swarm[j].compute_intensity(cost_func)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
@@ -65,7 +65,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
                     swarm[i].move_towards(swarm[j], beta, alpha)
                     try:
                         swarm[i].compute_intensity(cost_func)
-                    except util.StopCriterionReached as e:
+                    except StopCriterionReached as e:
                         if tuning_options.verbose:
                             print(e)
                         return cost_func.results
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 6a8565118..ec7c26f4c 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached, get_best_config
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -42,7 +42,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         for dna in population:
             try:
                 time = cost_func(dna, check_restrictions=False)
-            except util.StopCriterionReached as e:
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
@@ -54,7 +54,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         # 'best_score' is used only for printing
         if tuning_options.verbose and cost_func.results:
-            best_score = util.get_best_config(
+            best_score = get_best_config(
                 cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better
             )[tuning_options.objective]
 
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
index a4c521746..0608c092c 100644
--- a/kernel_tuner/strategies/greedy_ils.py
+++ b/kernel_tuner/strategies/greedy_ils.py
@@ -1,5 +1,5 @@
 """A simple greedy iterative local search algorithm for parameter search."""
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -40,7 +40,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         try:
             candidate = base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=True)
             new_score = cost_func(candidate, check_restrictions=False)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py
index 1b34da501..cdca53e12 100644
--- a/kernel_tuner/strategies/greedy_mls.py
+++ b/kernel_tuner/strategies/greedy_mls.py
@@ -1,5 +1,5 @@
 """A greedy multi-start local search algorithm for parameter search."""
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.hillclimbers import base_hillclimb
@@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         try:
             base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=randomize, order=order)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py
index 80c1c6f82..71929a040 100644
--- a/kernel_tuner/strategies/minimize.py
+++ b/kernel_tuner/strategies/minimize.py
@@ -2,7 +2,7 @@
 
 import scipy.optimize
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import (
     CostFunc,
@@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     opt_result = None
     try:
         opt_result = scipy.optimize.minimize(cost_func, x0, method=method, options=options, **kwargs)
-    except util.StopCriterionReached as e:
+    except StopCriterionReached as e:
         if tuning_options.verbose:
             print(e)
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index cc6b82d49..82c500197 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
@@ -52,7 +52,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         for j in range(num_particles):
             try:
                 swarm[j].evaluate(cost_func)
-            except util.StopCriterionReached as e:
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py
index 06ab4b9f6..57eaac6cc 100644
--- a/kernel_tuner/strategies/random_sample.py
+++ b/kernel_tuner/strategies/random_sample.py
@@ -1,7 +1,7 @@
 """Iterate over a random sample of the parameter space."""
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -26,7 +26,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     for sample in samples:
         try:
             cost_func(sample, check_restrictions=False)
-        except util.StopCriterionReached as e:
+        except StopCriterionReached as e:
             if tuning_options.verbose:
                 print(e)
             return cost_func.results
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index d73c0ad5e..b9738d741 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from kernel_tuner import util
+from kernel_tuner.util import StopCriterionReached
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
@@ -52,7 +52,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             new_pos = neighbor(pos, searchspace)
             try:
                 new_cost = cost_func(new_pos, check_restrictions=False)
-            except util.StopCriterionReached as e:
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 9c0e9faca..8b2b92a45 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -4,7 +4,7 @@
 import pytest
 
 import kernel_tuner
-from kernel_tuner import util
+from kernel_tuner.util import InvalidConfig
 from kernel_tuner.interface import strategy_map
 
 from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch
@@ -75,7 +75,7 @@ def test_strategies(vector_add, strategy):
         unique_results = {}
         for result in results:
             x_int = ",".join([str(v) for k, v in result.items() if k in tune_params])
-            if not isinstance(result["time"], util.InvalidConfig):
+            if not isinstance(result["time"], InvalidConfig):
                 unique_results[x_int] = result["time"]
         assert len(unique_results) <= filter_options["max_fevals"]
 
diff --git a/test/test_compiler_functions.py b/test/test_compiler_functions.py
index 913fee85d..cf35aac83 100644
--- a/test/test_compiler_functions.py
+++ b/test/test_compiler_functions.py
@@ -13,7 +13,7 @@
 import kernel_tuner
 from kernel_tuner.backends.compiler import CompilerFunctions, Argument, is_cupy_array, get_array_module
 from kernel_tuner.core import KernelSource, KernelInstance
-from kernel_tuner import util
+from kernel_tuner.util import delete_temp_file
 
 from .context import skip_if_no_gfortran, skip_if_no_gcc, skip_if_no_openmp, skip_if_no_cupy
 from .test_runners import env as cuda_env  # noqa: F401
@@ -391,7 +391,7 @@ def test_complies_fortran_function_with_module():
         assert np.isclose(result, 42.0)
 
     finally:
-        util.delete_temp_file("my_fancy_module.mod")
+        delete_temp_file("my_fancy_module.mod")
 
 
 @pytest.fixture
diff --git a/test/test_integration.py b/test/test_integration.py
index aafb437f1..637a07575 100644
--- a/test/test_integration.py
+++ b/test/test_integration.py
@@ -5,7 +5,7 @@
 import pytest
 
 from kernel_tuner import integration
-from kernel_tuner import util
+from kernel_tuner.util import delete_temp_file
 from datetime import datetime, timezone
 
 
@@ -71,7 +71,7 @@ def test_store_results(fake_results):
         assert my_gpu_100_data[0]["time"] < 100
 
     finally:
-        util.delete_temp_file(filename)
+        delete_temp_file(filename)
 
 
 def test_setup_device_targets(fake_results):
@@ -136,8 +136,8 @@ def test_setup_device_targets(fake_results):
         assert expected in output_str
 
     finally:
-        util.delete_temp_file(results_filename)
-        util.delete_temp_file(header_filename)
+        delete_temp_file(results_filename)
+        delete_temp_file(header_filename)
 
 
 def test_setup_device_targets_max(fake_results):
@@ -174,5 +174,5 @@ def test_setup_device_targets_max(fake_results):
         assert expected in output_str
 
     finally:
-        util.delete_temp_file(results_filename)
-        util.delete_temp_file(header_filename)
+        delete_temp_file(results_filename)
+        delete_temp_file(header_filename)
diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py
index c706e3953..9cd2d0185 100644
--- a/test/test_kernelbuilder.py
+++ b/test/test_kernelbuilder.py
@@ -3,8 +3,8 @@
 
 import pytest
 from kernel_tuner import kernelbuilder
-from kernel_tuner import util
 from kernel_tuner import integration
+from kernel_tuner.util import delete_temp_file
 
 
 backends = ["cuda", "cupy"]
@@ -59,4 +59,4 @@ def test_PythonKernel_tuned(test_kernel, backend):
         assert np.allclose(reference[0], a+b)
 
     finally:
-        util.delete_temp_file(test_results_file)
+        delete_temp_file(test_results_file)

From 5dd3e4c76e3de3db57218ea280075e7de50d5047 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 25 Mar 2025 12:02:56 +0100
Subject: [PATCH 142/168] Updated dependencies, required python version and
 bumped version

---
 kernel_tuner/backends/hypertuner.py | 5 +++--
 kernel_tuner/hyper.py               | 2 +-
 pyproject.toml                      | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 6348cc56d..66634e5c0 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -101,8 +101,9 @@ def compile(self, kernel_instance):
         # any additional settings
         override = { 
             "experimental_groups_defaults": { 
-                "repeats": 10,
-                "samples": self.iterations 
+                "repeats": 25,
+                "samples": self.iterations,
+                "minimum_fraction_of_budget_valid": 0.01, 
             }
         }
 
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 27672cf97..ed61558e5 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -152,7 +152,7 @@ def put_if_not_present(target_dict, key, value):
     elif strategy_to_tune.lower() == "bayes_opt":
         hyperparams = {
             # 'covariancekernel': ["constantrbf", "rbf", "matern32", "matern52"],
-            # 'covariancelengthscale': [1.0, 1.5, 2.0],
+            'covariancelengthscale': [1.0, 1.5, 2.0],
             'method': ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"],
             'samplingmethod': ["random", "LHS"],
             'popsize': [10, 20, 30]
diff --git a/pyproject.toml b/pyproject.toml
index d6453286d..d00045d67 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 [project]
 name = "kernel_tuner"
 description = "An easy to use CUDA/OpenCL kernel tuner in Python"
-version = "1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
+version = "1.1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
 readme = "README.md"
 license = "Apache-2.0"
 authors = [
@@ -44,13 +44,13 @@ classifiers = [
 ]
 
 # ATTENTION: if anything is changed here, run `poetry update`
-requires-python = ">=3.10,<3.15"  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
+requires-python = ">=3.10,<4" # <4 is because of hip-python-fork  # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
 dependencies = [
     "numpy (>=1.26.0,<2.0.0)",    # Python 3.12 requires numpy at least 1.26, CuPy does not support 2.0
     "scipy>=1.14.1",
     "packaging",        # required by file_utils
     "jsonschema",
-    "python-constraint2>=2.1.0",
+    "python-constraint2>=2.2.0",
     "xmltodict",
     "pandas>=2.0.0",
     "scikit-learn>=1.0.2",

From 02833f380e8d579f572c896cc30eeb0213b43096 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 25 Mar 2025 13:06:51 +0100
Subject: [PATCH 143/168] Updated dependencies, required python version and
 bumped version

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d00045d67..1a7684138 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 [project]
 name = "kernel_tuner"
 description = "An easy to use CUDA/OpenCL kernel tuner in Python"
-version = "1.1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
+version = "1.2.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
 readme = "README.md"
 license = "Apache-2.0"
 authors = [
@@ -80,7 +80,7 @@ include = [
 # cupy-cuda12x = { version = "*", optional = true }
 # cuda-python = { version = "*", optional = true }
 [project.optional-dependencies]
-cuda = ["pycuda>=2024.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
+cuda = ["pycuda>=2025.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
 opencl = ["pyopencl"]                                                   # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
 cuda_opencl = ["pycuda>=2024.1", "pyopencl"]                            # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
 hip = ["hip-python-fork"]

From b820419d9a8dc1561ff122e41836bf448f2d5543 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 25 Mar 2025 13:11:09 +0100
Subject: [PATCH 144/168] Updated documentation dependencies

---
 doc/requirements.txt      | 172 +++++++++++------------
 doc/requirements_test.txt | 284 +++++++++++++++++++-------------------
 2 files changed, 228 insertions(+), 228 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 378dccc76..96ba317c1 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,87 +1,87 @@
-alabaster==0.7.16 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-babel==2.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-bleach==6.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-certifi==2025.1.31 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-cffi==1.17.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32"
-decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-defusedxml==0.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-docutils==0.20.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-dom-toml==2.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+alabaster==0.7.16 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+babel==2.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+bleach==6.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+certifi==2025.1.31 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+cffi==1.17.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32"
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+defusedxml==0.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+docutils==0.20.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+dom-toml==2.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
 exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
-executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-idna==3.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-imagesize==1.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jinja2==3.1.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jupyter-client==8.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jupyter-core==5.7.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-mistune==3.1.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-natsort==8.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten")
-platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt"
-pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-pywin32==310 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" and platform_python_implementation != "PyPy" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" and platform_python_implementation != "PyPy"
-pyzmq==26.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-requests==2.32.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-soupsieve==2.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinx==7.4.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tinycss2==1.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tornado==6.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-urllib3==2.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-webencodings==0.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
-xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15"
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+idna==3.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+imagesize==1.4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jinja2==3.1.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jupyter-client==8.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jupyter-core==5.7.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+mistune==3.1.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+natsort==8.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbclient==0.10.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbconvert==7.16.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbformat==5.10.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+nbsphinx==0.9.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pandocfilters==1.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten")
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and os_name != "nt" or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten")
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pycparser==2.22 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+python-constraint2==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pytz==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+pywin32==310 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" and platform_python_implementation != "PyPy" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32" and platform_python_implementation != "PyPy"
+pyzmq==26.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+requests==2.32.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+soupsieve==2.6 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinx==7.4.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tinycss2==1.4.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tornado==6.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+tzdata==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+urllib3==2.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4"
diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt
index 11ed8518b..8a5ac0b63 100644
--- a/doc/requirements_test.txt
+++ b/doc/requirements_test.txt
@@ -1,122 +1,122 @@
-argcomplete==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b \
-    --hash=sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd
-asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+argcomplete==3.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:927531c2fbaa004979f18c2316f6ffadcfc5cc2de15ae2624dfe65deaf60e14f \
+    --hash=sha256:cef54d7f752560570291214f0f1c48c3b8ef09aca63d65de7747612666725dbc
+asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \
     --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2
-attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \
     --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b
-build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \
     --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7
-colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "3.15" and sys_platform == "win32" or python_version >= "3.12" and python_version < "3.15" and os_name == "nt" \
+colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32" or python_version >= "3.12" and python_version < "4" and os_name == "nt" \
     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
-colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \
     --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2
-coverage==7.7.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:056d3017ed67e7ddf266e6f57378ece543755a4c9231e997789ab3bd11392c94 \
-    --hash=sha256:0ce8cf59e09d31a4915ff4c3b94c6514af4c84b22c4cc8ad7c3c546a86150a92 \
-    --hash=sha256:104bf640f408f4e115b85110047c7f27377e1a8b7ba86f7db4fa47aa49dc9a8e \
-    --hash=sha256:1393e5aa9441dafb0162c36c8506c648b89aea9565b31f6bfa351e66c11bcd82 \
-    --hash=sha256:1586ad158523f4133499a4f322b230e2cfef9cc724820dbd58595a5a236186f4 \
-    --hash=sha256:180e3fc68ee4dc5af8b33b6ca4e3bb8aa1abe25eedcb958ba5cff7123071af68 \
-    --hash=sha256:1b336d06af14f8da5b1f391e8dec03634daf54dfcb4d1c4fb6d04c09d83cef90 \
-    --hash=sha256:1c8fbce80b2b8bf135d105aa8f5b36eae0c57d702a1cc3ebdea2a6f03f6cdde5 \
-    --hash=sha256:2d673e3add00048215c2cc507f1228a7523fd8bf34f279ac98334c9b07bd2656 \
-    --hash=sha256:316f29cc3392fa3912493ee4c83afa4a0e2db04ff69600711f8c03997c39baaa \
-    --hash=sha256:33c1394d8407e2771547583b66a85d07ed441ff8fae5a4adb4237ad39ece60db \
-    --hash=sha256:37cbc7b0d93dfd133e33c7ec01123fbb90401dce174c3b6661d8d36fb1e30608 \
-    --hash=sha256:39abcacd1ed54e2c33c54bdc488b310e8ef6705833f7148b6eb9a547199d375d \
-    --hash=sha256:3ab7090f04b12dc6469882ce81244572779d3a4b67eea1c96fb9ecc8c607ef39 \
-    --hash=sha256:3b0e6e54591ae0d7427def8a4d40fca99df6b899d10354bab73cd5609807261c \
-    --hash=sha256:416e2a8845eaff288f97eaf76ab40367deafb9073ffc47bf2a583f26b05e5265 \
-    --hash=sha256:4545485fef7a8a2d8f30e6f79ce719eb154aab7e44217eb444c1d38239af2072 \
-    --hash=sha256:4c124025430249118d018dcedc8b7426f39373527c845093132196f2a483b6dd \
-    --hash=sha256:4fbb7a0c3c21908520149d7751cf5b74eb9b38b54d62997b1e9b3ac19a8ee2fe \
-    --hash=sha256:52fc89602cde411a4196c8c6894afb384f2125f34c031774f82a4f2608c59d7d \
-    --hash=sha256:55143aa13c49491f5606f05b49ed88663446dce3a4d3c5d77baa4e36a16d3573 \
-    --hash=sha256:57f3bd0d29bf2bd9325c0ff9cc532a175110c4bf8f412c05b2405fd35745266d \
-    --hash=sha256:5b2f144444879363ea8834cd7b6869d79ac796cb8f864b0cfdde50296cd95816 \
-    --hash=sha256:5efdeff5f353ed3352c04e6b318ab05c6ce9249c25ed3c2090c6e9cadda1e3b2 \
-    --hash=sha256:60e6347d1ed882b1159ffea172cb8466ee46c665af4ca397edbf10ff53e9ffaf \
-    --hash=sha256:693d921621a0c8043bfdc61f7d4df5ea6d22165fe8b807cac21eb80dd94e4bbd \
-    --hash=sha256:708f0a1105ef2b11c79ed54ed31f17e6325ac936501fc373f24be3e6a578146a \
-    --hash=sha256:70f0925c4e2bfc965369f417e7cc72538fd1ba91639cf1e4ef4b1a6b50439b3b \
-    --hash=sha256:7789e700f33f2b133adae582c9f437523cd5db8de845774988a58c360fc88253 \
-    --hash=sha256:7b6c96d69928a3a6767fab8dc1ce8a02cf0156836ccb1e820c7f45a423570d98 \
-    --hash=sha256:7d2a65876274acf544703e943c010b60bd79404e3623a1e5d52b64a6e2728de5 \
-    --hash=sha256:7f18d47641282664276977c604b5a261e51fefc2980f5271d547d706b06a837f \
-    --hash=sha256:89078312f06237417adda7c021c33f80f7a6d2db8572a5f6c330d89b080061ce \
-    --hash=sha256:8c938c6ae59be67ac19a7204e079efc94b38222cd7d0269f96e45e18cddeaa59 \
-    --hash=sha256:8e336b56301774ace6be0017ff85c3566c556d938359b61b840796a0202f805c \
-    --hash=sha256:a0a207c87a9f743c8072d059b4711f8d13c456eb42dac778a7d2e5d4f3c253a7 \
-    --hash=sha256:a2454b12a3f12cc4698f3508912e6225ec63682e2ca5a96f80a2b93cef9e63f3 \
-    --hash=sha256:a538a23119d1e2e2ce077e902d02ea3d8e0641786ef6e0faf11ce82324743944 \
-    --hash=sha256:aa4dff57fc21a575672176d5ab0ef15a927199e775c5e8a3d75162ab2b0c7705 \
-    --hash=sha256:ad0edaa97cb983d9f2ff48cadddc3e1fb09f24aa558abeb4dc9a0dbacd12cbb4 \
-    --hash=sha256:ae8006772c6b0fa53c33747913473e064985dac4d65f77fd2fdc6474e7cd54e4 \
-    --hash=sha256:b0fac2088ec4aaeb5468b814bd3ff5e5978364bfbce5e567c44c9e2854469f6c \
-    --hash=sha256:b3e212a894d8ae07fde2ca8b43d666a6d49bbbddb10da0f6a74ca7bd31f20054 \
-    --hash=sha256:b54a1ee4c6f1905a436cbaa04b26626d27925a41cbc3a337e2d3ff7038187f07 \
-    --hash=sha256:b667b91f4f714b17af2a18e220015c941d1cf8b07c17f2160033dbe1e64149f0 \
-    --hash=sha256:b8c36093aca722db73633cf2359026ed7782a239eb1c6db2abcff876012dc4cf \
-    --hash=sha256:bb356e7ae7c2da13f404bf8f75be90f743c6df8d4607022e759f5d7d89fe83f8 \
-    --hash=sha256:bce730d484038e97f27ea2dbe5d392ec5c2261f28c319a3bb266f6b213650135 \
-    --hash=sha256:c075d167a6ec99b798c1fdf6e391a1d5a2d054caffe9593ba0f97e3df2c04f0e \
-    --hash=sha256:c4e09534037933bf6eb31d804e72c52ec23219b32c1730f9152feabbd7499463 \
-    --hash=sha256:c5f8a5364fc37b2f172c26a038bc7ec4885f429de4a05fc10fdcb53fb5834c5c \
-    --hash=sha256:cb203c0afffaf1a8f5b9659a013f8f16a1b2cad3a80a8733ceedc968c0cf4c57 \
-    --hash=sha256:cc41374d2f27d81d6558f8a24e5c114580ffefc197fd43eabd7058182f743322 \
-    --hash=sha256:cd879d4646055a573775a1cec863d00c9ff8c55860f8b17f6d8eee9140c06166 \
-    --hash=sha256:d013c07061751ae81861cae6ec3a4fe04e84781b11fd4b6b4201590234b25c7b \
-    --hash=sha256:d8c7524779003d59948c51b4fcbf1ca4e27c26a7d75984f63488f3625c328b9b \
-    --hash=sha256:d9710521f07f526de30ccdead67e6b236fe996d214e1a7fba8b36e2ba2cd8261 \
-    --hash=sha256:e1ffde1d6bc2a92f9c9207d1ad808550873748ac2d4d923c815b866baa343b3f \
-    --hash=sha256:e7f559c36d5cdc448ee13e7e56ed7b6b5d44a40a511d584d388a0f5d940977ba \
-    --hash=sha256:f2a1e18a85bd066c7c556d85277a7adf4651f259b2579113844835ba1a74aafd \
-    --hash=sha256:f32b165bf6dfea0846a9c9c38b7e1d68f313956d60a15cde5d1709fddcaf3bee \
-    --hash=sha256:f5a2f71d6a91238e7628f23538c26aa464d390cbdedf12ee2a7a0fb92a24482a \
-    --hash=sha256:f81fe93dc1b8e5673f33443c0786c14b77e36f1025973b85e07c70353e46882b
-decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+coverage==7.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:02fad4f8faa4153db76f9246bc95c1d99f054f4e0a884175bff9155cf4f856cb \
+    --hash=sha256:092b134129a8bb940c08b2d9ceb4459af5fb3faea77888af63182e17d89e1cf1 \
+    --hash=sha256:0ce92c5a9d7007d838456f4b77ea159cb628187a137e1895331e530973dcf862 \
+    --hash=sha256:0dab4ef76d7b14f432057fdb7a0477e8bffca0ad39ace308be6e74864e632271 \
+    --hash=sha256:1165490be0069e34e4f99d08e9c5209c463de11b471709dfae31e2a98cbd49fd \
+    --hash=sha256:11dd6f52c2a7ce8bf0a5f3b6e4a8eb60e157ffedc3c4b4314a41c1dfbd26ce58 \
+    --hash=sha256:15d54ecef1582b1d3ec6049b20d3c1a07d5e7f85335d8a3b617c9960b4f807e0 \
+    --hash=sha256:171e9977c6a5d2b2be9efc7df1126fd525ce7cad0eb9904fe692da007ba90d81 \
+    --hash=sha256:177d837339883c541f8524683e227adcaea581eca6bb33823a2a1fdae4c988e1 \
+    --hash=sha256:18f544356bceef17cc55fcf859e5664f06946c1b68efcea6acdc50f8f6a6e776 \
+    --hash=sha256:199a1272e642266b90c9f40dec7fd3d307b51bf639fa0d15980dc0b3246c1393 \
+    --hash=sha256:1e6f867379fd033a0eeabb1be0cffa2bd660582b8b0c9478895c509d875a9d9e \
+    --hash=sha256:2444fbe1ba1889e0b29eb4d11931afa88f92dc507b7248f45be372775b3cef4f \
+    --hash=sha256:25fe40967717bad0ce628a0223f08a10d54c9d739e88c9cbb0f77b5959367542 \
+    --hash=sha256:264ff2bcce27a7f455b64ac0dfe097680b65d9a1a293ef902675fa8158d20b24 \
+    --hash=sha256:2a79c4a09765d18311c35975ad2eb1ac613c0401afdd9cb1ca4110aeb5dd3c4c \
+    --hash=sha256:2c492401bdb3a85824669d6a03f57b3dfadef0941b8541f035f83bbfc39d4282 \
+    --hash=sha256:315ff74b585110ac3b7ab631e89e769d294f303c6d21302a816b3554ed4c81af \
+    --hash=sha256:34a3bf6b92e6621fc4dcdaab353e173ccb0ca9e4bfbcf7e49a0134c86c9cd303 \
+    --hash=sha256:37351dc8123c154fa05b7579fdb126b9f8b1cf42fd6f79ddf19121b7bdd4aa04 \
+    --hash=sha256:385618003e3d608001676bb35dc67ae3ad44c75c0395d8de5780af7bb35be6b2 \
+    --hash=sha256:392cc8fd2b1b010ca36840735e2a526fcbd76795a5d44006065e79868cc76ccf \
+    --hash=sha256:3d03287eb03186256999539d98818c425c33546ab4901028c8fa933b62c35c3a \
+    --hash=sha256:44683f2556a56c9a6e673b583763096b8efbd2df022b02995609cf8e64fc8ae0 \
+    --hash=sha256:44af11c00fd3b19b8809487630f8a0039130d32363239dfd15238e6d37e41a48 \
+    --hash=sha256:452735fafe8ff5918236d5fe1feac322b359e57692269c75151f9b4ee4b7e1bc \
+    --hash=sha256:4c181ceba2e6808ede1e964f7bdc77bd8c7eb62f202c63a48cc541e5ffffccb6 \
+    --hash=sha256:4dd532dac197d68c478480edde74fd4476c6823355987fd31d01ad9aa1e5fb59 \
+    --hash=sha256:520af84febb6bb54453e7fbb730afa58c7178fd018c398a8fcd8e269a79bf96d \
+    --hash=sha256:553ba93f8e3c70e1b0031e4dfea36aba4e2b51fe5770db35e99af8dc5c5a9dfe \
+    --hash=sha256:5b7b02e50d54be6114cc4f6a3222fec83164f7c42772ba03b520138859b5fde1 \
+    --hash=sha256:63306486fcb5a827449464f6211d2991f01dfa2965976018c9bab9d5e45a35c8 \
+    --hash=sha256:75c82b27c56478d5e1391f2e7b2e7f588d093157fa40d53fd9453a471b1191f2 \
+    --hash=sha256:7ba5ff236c87a7b7aa1441a216caf44baee14cbfbd2256d306f926d16b026578 \
+    --hash=sha256:7e688010581dbac9cab72800e9076e16f7cccd0d89af5785b70daa11174e94de \
+    --hash=sha256:80b5b207a8b08c6a934b214e364cab2fa82663d4af18981a6c0a9e95f8df7602 \
+    --hash=sha256:822fa99dd1ac686061e1219b67868e25d9757989cf2259f735a4802497d6da31 \
+    --hash=sha256:881cae0f9cbd928c9c001487bb3dcbfd0b0af3ef53ae92180878591053be0cb3 \
+    --hash=sha256:88d96127ae01ff571d465d4b0be25c123789cef88ba0879194d673fdea52f54e \
+    --hash=sha256:8b1c65a739447c5ddce5b96c0a388fd82e4bbdff7251396a70182b1d83631019 \
+    --hash=sha256:8fed429c26b99641dc1f3a79179860122b22745dd9af36f29b141e178925070a \
+    --hash=sha256:9bb47cc9f07a59a451361a850cb06d20633e77a9118d05fd0f77b1864439461b \
+    --hash=sha256:a6b6b3bd121ee2ec4bd35039319f3423d0be282b9752a5ae9f18724bc93ebe7c \
+    --hash=sha256:ae13ed5bf5542d7d4a0a42ff5160e07e84adc44eda65ddaa635c484ff8e55917 \
+    --hash=sha256:af94fb80e4f159f4d93fb411800448ad87b6039b0500849a403b73a0d36bb5ae \
+    --hash=sha256:b4c144c129343416a49378e05c9451c34aae5ccf00221e4fa4f487db0816ee2f \
+    --hash=sha256:b52edb940d087e2a96e73c1523284a2e94a4e66fa2ea1e2e64dddc67173bad94 \
+    --hash=sha256:b559adc22486937786731dac69e57296cb9aede7e2687dfc0d2696dbd3b1eb6b \
+    --hash=sha256:b838a91e84e1773c3436f6cc6996e000ed3ca5721799e7789be18830fad009a2 \
+    --hash=sha256:ba8480ebe401c2f094d10a8c4209b800a9b77215b6c796d16b6ecdf665048950 \
+    --hash=sha256:bc96441c9d9ca12a790b5ae17d2fa6654da4b3962ea15e0eabb1b1caed094777 \
+    --hash=sha256:c90e9141e9221dd6fbc16a2727a5703c19443a8d9bf7d634c792fa0287cee1ab \
+    --hash=sha256:d2e73e2ac468536197e6b3ab79bc4a5c9da0f078cd78cfcc7fe27cf5d1195ef0 \
+    --hash=sha256:d3154b369141c3169b8133973ac00f63fcf8d6dbcc297d788d36afbb7811e511 \
+    --hash=sha256:d66ff48ab3bb6f762a153e29c0fc1eb5a62a260217bc64470d7ba602f5886d20 \
+    --hash=sha256:d6874929d624d3a670f676efafbbc747f519a6121b581dd41d012109e70a5ebd \
+    --hash=sha256:e33426a5e1dc7743dd54dfd11d3a6c02c5d127abfaa2edd80a6e352b58347d1a \
+    --hash=sha256:e52eb31ae3afacdacfe50705a15b75ded67935770c460d88c215a9c0c40d0e9c \
+    --hash=sha256:eae79f8e3501133aa0e220bbc29573910d096795882a70e6f6e6637b09522133 \
+    --hash=sha256:eebd927b86761a7068a06d3699fd6c20129becf15bb44282db085921ea0f1585 \
+    --hash=sha256:eff187177d8016ff6addf789dcc421c3db0d014e4946c1cc3fbf697f7852459d \
+    --hash=sha256:f5f99a93cecf799738e211f9746dc83749b5693538fbfac279a61682ba309387 \
+    --hash=sha256:fbba59022e7c20124d2f520842b75904c7b9f16c854233fa46575c69949fb5b9
+decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \
     --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a
-distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \
     --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403
 exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \
     --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
     --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
-executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \
     --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755
-filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \
     --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de
 importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \
     --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \
     --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580
-iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \
     --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760
-ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \
     --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a
-jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \
     --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9
-joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \
     --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e
-jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \
     --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf
-jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \
     --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566
-markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \
     --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \
     --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \
@@ -177,19 +177,19 @@ markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or pyt
     --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \
     --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \
     --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68
-matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \
     --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca
-mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \
     --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f
-nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \
     --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682
-nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \
     --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \
     --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \
@@ -226,10 +226,10 @@ numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_
     --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \
     --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
     --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
-packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
-pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \
     --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \
     --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \
@@ -272,72 +272,72 @@ pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_
     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
-parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \
     --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d
-pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \
     --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d
-pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \
     --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \
     --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f
-platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \
     --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351
-pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
     --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
-prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \
     --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198
-ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "3.15" and os_name != "nt" or python_version >= "3.12" and python_version < "3.15" and (sys_platform != "win32" and sys_platform != "emscripten") \
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and os_name != "nt" or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \
     --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \
     --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220
-pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \
     --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42
-pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \
     --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c
-pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \
     --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913
-pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \
     --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857
-pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \
     --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e
-pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \
     --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845
-python-constraint2==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:02f46e4a7e8a46048604870287f1c55312eea47c2c15dd58b51057cb7d057bdc \
-    --hash=sha256:0e5ece0b4e85ed680af6b9db33ef3497a6f9499b8957cd830cd139f17ac29aef \
-    --hash=sha256:0f3a09c1947e6a90b9558cd1651e86dbe10f698aad56247596f2b856307707f0 \
-    --hash=sha256:1c650d717c2585fd8b2247f680ca1dcc6ea970cc5644c1d847f97eacb9f7dce2 \
-    --hash=sha256:38e4dbb2522ca2295873a57f6e0fddbb0856a780c87edd79b4074fd78790fed3 \
-    --hash=sha256:441f6a06e6c88c5fbe724b834c820d959ba7542037139153d1466c7be00c7cc0 \
-    --hash=sha256:6b8f82be66242fc5587011360b07c39e6e71e5d1c8f26a107dd2b04ab7854fcc \
-    --hash=sha256:8086a21724048746e68ab721cb4a216db15f86bb700d557af0ac60f2087d4d4e \
-    --hash=sha256:abea9ae443bf33fb396a6fb597b713e110f2abd9ecf1a656cd81f53da6751b79 \
-    --hash=sha256:ace17786565250de48b8d18da555feb31f5fb3521b2bd65e9871459e1d179600 \
-    --hash=sha256:b2385c99a9fe67ae26085a5a048c1d206cf0bd74acf0cd036227afa2a90fa4fd \
-    --hash=sha256:e29bed90abe1240bf24794e73e4d8fa3e50b6aa9226d915b1902cdd03375c28b \
-    --hash=sha256:ee3d33ca5694724a17bb596b93ff8687c70b4c07945e40a9007250e282e7ab28 \
-    --hash=sha256:f28d07eae04d83d454f0e6ba2da0678786a21f2d405998a3eec960b56d809692 \
-    --hash=sha256:fbb6ab033a7a4250bce11ca12fdf8958c6c42853e933cf585dbd265e0967dd93 \
-    --hash=sha256:fc3cffd0f16cb9b34d2e95bd6d27425dd24044073760477a1341e835fc9c45f4
-python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+python-constraint2==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:0a841b088076d9dc481989359076b439d5201126583d920173ed9ab9cf7c4771 \
+    --hash=sha256:0f0acfbae77ef7fcbff25d1c46b2360e0c486667c1a595b5c7cd4a6540cad5e6 \
+    --hash=sha256:203b740a78266123e36d88215bb232e5e682c5845b674d2d5b1218fb3394ff1f \
+    --hash=sha256:298c322c157ae6f5a9a9b8de3d08eefcdfed7b78e4abb2ddffe1bd345ed7997b \
+    --hash=sha256:348ee17de0de028b68bf8050af142adfae37b500e60ac6758dc499bc19712805 \
+    --hash=sha256:46cb1946fc7fb63262c43d4366f8cfceb551fb7a2bf10f275ac236d968746e02 \
+    --hash=sha256:48c4f8ca1573f08bb6ef900cbe2e642aa6afb77c11a1f7c9d42c054fcfd93b8b \
+    --hash=sha256:7bf723afbfdd13155f38d1344b015fd962818fdf70cdf39005a6a5bf810e5001 \
+    --hash=sha256:85ea5330b12ccb4a474c89e3fdd037c5173db0216985da0e9a5bc20f6e26d0ca \
+    --hash=sha256:8a39fecbb893137814a4f0ce82fd78df68789d658c6991bb6d57d773a6f8878d \
+    --hash=sha256:aae18d318fd5150cda3befcf40b178a8dc661abb79cf663fefb7edd6e3afd6ab \
+    --hash=sha256:b4d6159d05204cddfa4e46eef24a10f1d6aed41a905ca83314f5d1caa31599ab \
+    --hash=sha256:c337839cfb0b3559f2f211e2ae67993c7187abf5dddbc5b587fe26b7c1b5d0b0 \
+    --hash=sha256:c3b887f073f59cf5151df3cd25c2142016676da9034d5af56478c735526882d3 \
+    --hash=sha256:d060b179461f09ee6571222ee63b4ac8dafdb6a41ffa75296a2f6b07a6bc500e \
+    --hash=sha256:f1590a5699e1097f0057513e64bac4ac2d11f5848467c1c27967e1217f8bec3d
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
     --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
-pytz==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57 \
-    --hash=sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e
-referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+pytz==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \
+    --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00
+referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \
     --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0
-rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \
     --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \
     --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \
@@ -441,7 +441,7 @@ rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or pytho
     --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \
     --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \
     --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8
-ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \
     --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \
     --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \
@@ -459,7 +459,7 @@ ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_v
     --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \
     --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \
     --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca
-scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \
     --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \
     --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \
@@ -490,7 +490,7 @@ scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or p
     --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \
     --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \
     --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415
-scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \
     --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \
     --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \
@@ -537,16 +537,16 @@ scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_
     --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \
     --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \
     --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db
-six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
     --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81
-stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \
     --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695
-threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \
     --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e
-tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \
     --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \
     --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \
@@ -579,25 +579,25 @@ tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_v
     --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \
     --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \
     --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7
-tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \
     --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79
-traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \
     --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f
-typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
-tzdata==2025.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
-    --hash=sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694 \
-    --hash=sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639
-virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+tzdata==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
+    --hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \
+    --hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9
+virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \
     --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac
-wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \
     --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5
-xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "3.15" \
+xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \
     --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \
     --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac
 zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \

From 11b378fea7310199aa120eed05b0c5ced1fe604c Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 10:45:49 +0100
Subject: [PATCH 145/168] Added python version classifiers

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 1a7684138..1d17ecadc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,10 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development",
     "Topic :: System :: Distributed Computing",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13"
 ]
 
 # ATTENTION: if anything is changed here, run `poetry update`

From 6550916492ce3a575701cdabfb43ef83e2008bae Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:18:02 +0100
Subject: [PATCH 146/168] Improved code quality based on sonarcloud issues

---
 kernel_tuner/python.py      | 110 ++++++++++++++++++------------------
 kernel_tuner/searchspace.py |   3 +-
 2 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
index 00f2b24c1..0f450c690 100644
--- a/kernel_tuner/python.py
+++ b/kernel_tuner/python.py
@@ -31,7 +31,7 @@
 
 
 class PythonFunctions(object):
-    """Class that groups the code for running and compiling C functions"""
+    """Class that groups the code for running Python"""
 
     def __init__(self, iterations=7, observers=None, parallel_mode=False, hyperparam_mode=False, show_progressbar=False):
         """instantiate PythonFunctions object used for interacting with Python code
@@ -217,59 +217,61 @@ def benchmark_hyperparams(self, func, args, threads, grid):
         # print(f"In {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
         return result
 
-        start_time = perf_counter()
-        if self.parallel_mode:
-            num_procs = max(cpu_count() - 1, 1)
-            logging.debug(f"Running benchmark in parallel on {num_procs} processors")
-            manager = Manager()
-            MRE_values = manager.list()
-            runtimes = manager.list()
-            with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
-                args = func, args, self.params
-                MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                MRE_values, runtimes = list(MRE_values), list(runtimes)
-                print(MRE_values)
-            result["times"] = values
-            result["strategy_time"] = np.mean(runtimes)
-            np_results = np.array(values)
-        else:
-            # sequential implementation
-            np_results = np.array([])
-            for iter in iterator:
-                for obs in self.observers:
-                    obs.before_start()
-                value = self.run_kernel(func, args)
-                for obs in self.observers:
-                    obs.after_finish()
-
-                if value < 0.0:
-                    raise ValueError("Invalid benchmark result")
-
-                result["times"].append(value)
-                np_results = np.append(np_results, value)
-                if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
-                    break
-
-            # fill up the remaining iters with invalid in case of a break
-            result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
-
-            # finish by instrumenting the results with the observers
-            for obs in self.observers:
-                result.update(obs.get_results())
-
-        benchmark_time = perf_counter() - start_time
-        self.benchmark_times.append(benchmark_time)
-        print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
-
-        # calculate the mean of the means of the Mean Relative Error over the valid results
-        valid_results = np_results[np_results < invalid_value]
-        mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
-
-        # write the 'time' to the results and return
-        if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
-            mean_mean_MRE = invalid_value
-        result["time"] = mean_mean_MRE
-        return result
+        # old implementation
+
+        # start_time = perf_counter()
+        # if self.parallel_mode:
+        #     num_procs = max(cpu_count() - 1, 1)
+        #     logging.debug(f"Running benchmark in parallel on {num_procs} processors")
+        #     manager = Manager()
+        #     MRE_values = manager.list()
+        #     runtimes = manager.list()
+        #     with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
+        #         args = func, args, self.params
+        #         MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
+        #         MRE_values, runtimes = list(MRE_values), list(runtimes)
+        #         print(MRE_values)
+        #     result["times"] = values
+        #     result["strategy_time"] = np.mean(runtimes)
+        #     np_results = np.array(values)
+
+        # # sequential implementation
+        # np_results = np.array([])
+        # for iter in iterator:
+        #     for obs in self.observers:
+        #         obs.before_start()
+        #     value = self.run_kernel(func, args)
+        #     for obs in self.observers:
+        #         obs.after_finish()
+
+        #     if value < 0.0:
+        #         raise ValueError("Invalid benchmark result")
+
+        #     result["times"].append(value)
+        #     np_results = np.append(np_results, value)
+        #     if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
+        #         break
+
+        # # fill up the remaining iters with invalid in case of a break
+        # result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
+
+        # # finish by instrumenting the results with the observers
+        # for obs in self.observers:
+        #     result.update(obs.get_results())
+
+        # benchmark_time = perf_counter() - start_time
+        # self.benchmark_times.append(benchmark_time)
+        # print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
+
+        # # calculate the mean of the means of the Mean Relative Error over the valid results
+        # valid_results = np_results[np_results < invalid_value]
+        # mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
+
+        # # write the 'time' to the results and return
+        # if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
+        #     mean_mean_MRE = invalid_value
+        # result["time"] = mean_mean_MRE
+        # return result
 
     def run_kernel(self, func, args, threads, grid):
         """runs the kernel once, returns whatever the kernel returns
diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py
index 8b285f5ad..e650f9628 100644
--- a/kernel_tuner/searchspace.py
+++ b/kernel_tuner/searchspace.py
@@ -1017,5 +1017,4 @@ def to_ax_searchspace(self):
         raise NotImplementedError(
             "Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity."
         )
-
-        return ax_searchspace
+        # return ax_searchspace

From 6770d3ccf9c85cecc46619ba90590fe61983f134 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:27:03 +0100
Subject: [PATCH 147/168] Removed PythonFunctions approach to hyperparameter
 tuning that is no longer needed with the autotuning methodology interface

---
 kernel_tuner/python.py | 401 -----------------------------------------
 1 file changed, 401 deletions(-)
 delete mode 100644 kernel_tuner/python.py

diff --git a/kernel_tuner/python.py b/kernel_tuner/python.py
deleted file mode 100644
index 0f450c690..000000000
--- a/kernel_tuner/python.py
+++ /dev/null
@@ -1,401 +0,0 @@
-""" This module contains the functionality for running Python functions """
-
-from collections import namedtuple
-import platform
-import logging
-import warnings
-import importlib.util
-from math import ceil
-from time import perf_counter
-from typing import Tuple
-
-# import cProfile
-
-import progressbar
-import numpy as np
-
-# for parallel subprocess runs
-from multiprocess import Manager, cpu_count, get_context    # using Pathos as Python's multiprocessing is unable to pickle
-from itertools import repeat
-import subprocess
-import sys
-from os import getpid
-
-from kernel_tuner.util import get_temp_filename, delete_temp_file
-
-# This represents an individual kernel argument.
-# It contains a numpy object (ndarray or number) and a ctypes object with a copy
-# of the argument data. For an ndarray, the ctypes object is a wrapper for the ndarray's data.
-Argument = namedtuple("Argument", ["numpy", "ctypes"])
-invalid_value = 1e20
-
-
-class PythonFunctions(object):
-    """Class that groups the code for running Python"""
-
-    def __init__(self, iterations=7, observers=None, parallel_mode=False, hyperparam_mode=False, show_progressbar=False):
-        """instantiate PythonFunctions object used for interacting with Python code
-
-        :param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
-        :type iterations: int
-        """
-        self.iterations = iterations
-        self.max_threads = 1024
-        self.show_progressbar = show_progressbar
-
-        #environment info
-        env = dict()
-        env["iterations"] = self.iterations
-        self.env = env
-        self.name = platform.processor()
-        self.observers = observers or []
-        self.num_unused_cores = 1    # do not use all cores to do other work
-        self.num_cores = max(min(cpu_count() - self.num_unused_cores, self.iterations), 1)    # assumes cpu_count does not change during the life of this class!
-        self.parallel_mode = parallel_mode and self.num_cores > 1
-        self.hyperparam_mode = hyperparam_mode
-
-        self.benchmark = self.benchmark_normal if not self.hyperparam_mode else self.benchmark_hyperparams
-
-        self.benchmark_times = []
-
-        if self.parallel_mode:
-            warnings.warn(
-                "Be sure to check that simulation mode is true for the kernel, because parallel mode requires a completed cache file to avoid race conditions.")
-
-        if len(self.observers) > 0 and self.parallel_mode:
-            raise NotImplementedError("Observers are currently not implemented for parallel execution.")
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *exc):
-        pass
-
-    def ready_argument_list(self, arguments):
-        """ready argument list to be passed to the Python function
-        """
-        return arguments
-
-    def compile(self, kernel_instance):
-        """ return the function from the kernel instance """
-
-        suffix = kernel_instance.kernel_source.get_user_suffix()
-        source_file = get_temp_filename(suffix=suffix)
-
-        spec = importlib.util.find_spec(kernel_instance.name)
-        foo = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(foo)
-        func = getattr(foo, kernel_instance.name)
-
-        self.params = kernel_instance.params
-
-        delete_temp_file(source_file)
-        return func
-
-    def benchmark_normal(self, func, args, threads, grid):
-        """runs the kernel repeatedly, returns times
-
-        :param func: A Python function for this specific configuration
-        :type func: ctypes._FuncPtr
-
-        :param args: A list of arguments to the function, order should match the
-            order in the code. The list should be prepared using
-            ready_argument_list().
-        :type args: list(Argument)
-
-        :param threads: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type threads: any
-
-        :param grid: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type grid: any
-
-        :returns: All times.
-        :rtype: dict()
-        """
-
-        result = dict()
-        result["times"] = []
-        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
-            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
-
-        # new implementation
-        start_time = perf_counter()
-        if self.parallel_mode:
-            logging.debug(f"Running benchmark in parallel on {self.num_cores} processors")
-            manager = Manager()
-            invalid_flag = manager.Value('i', int(False))
-            values = manager.list()
-            runtimes = manager.list()
-            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
-                args = func, args, self.params, invalid_flag
-                values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                values, runtimes = list(values), list(runtimes)
-            result["strategy_time"] = np.mean(runtimes)
-        else:
-            values = list()
-            for _ in range(self.iterations):
-                value = self.run_kernel(func, args, threads, grid)
-                if value < 0.0:
-                    raise Exception("too many resources requested for launch")
-                values.append(value)
-
-        benchmark_time = perf_counter() - start_time
-        self.benchmark_times.append(benchmark_time)
-
-        result["times"] = values
-        result["time"] = np.mean(values)
-        # print(f"Mean: {np.mean(values)}, std: {np.std(values)} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
-        return result
-
-    def benchmark_hyperparams(self, func, args, threads, grid):
-        """runs the kernel repeatedly, returns grandmedian for hyperparameter tuning
-
-        :param func: A Python function for this specific configuration
-        :type func: ctypes._FuncPtr
-
-        :param args: A list of arguments to the function, order should match the
-            order in the code. The list should be prepared using
-            ready_argument_list().
-        :type args: list(Argument)
-
-        :param threads: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type threads: any
-
-        :param grid: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type grid: any
-
-        :returns: All execution hyperparameter scores in the same format as times.
-        :rtype: dict()
-        """
-
-        # For reference: the following times were obtained with 35 repeats on random_sample strategy.
-        # As seen, there is a lot of overhead with subproceses; directly executing the function scales much better.
-        # time taken by sequential: 20.7 sec
-        # time taken by parallel in sequential form (subprocess overhead): 46.3 sec
-        # time taken by parallel subprocesses: 7.5 sec on 9, 9.9 sec on 8, 13.6 sec on 4, 27.8 sec on 2, 45.9 sec on 1
-        # time taken by parallel directly: 2.99 sec on 9, 4.0 sec on 8, 5.23 sec on 4, 11.3 sec on 2, 19.3 sec on 1
-
-        result = dict()
-        result["times"] = []
-        min_valid_iterations = ceil(self.iterations * 0.8)
-        iterator = range(self.iterations) if not self.show_progressbar or self.parallel_mode else progressbar.progressbar(
-            range(self.iterations), min_value=0, max_value=self.iterations, redirect_stdout=True)
-
-        # new implementation
-        start_time = perf_counter()
-        if self.parallel_mode:
-            logging.debug(f"Running hyperparameter benchmark in parallel on {self.num_cores} processors")
-            manager = Manager()
-            invalid_flag = manager.Value('i', int(False))
-            MWP_values = manager.list()
-            runtimes = manager.list()
-            warnings_dicts = manager.list()
-            with get_context('spawn').Pool(self.num_cores) as pool:    # spawn alternative is forkserver, creates a reusable server
-                args = func, args, self.params, invalid_flag
-                MWP_values, runtimes, warnings_dicts = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-                MWP_values, runtimes, warnings_dicts = list(MWP_values), list(runtimes), list(warnings_dicts)
-            result["strategy_time"] = np.mean(runtimes)
-            warning_dict = warnings_dicts[0]
-            for key in warning_dict.keys():
-                warning_dict[key] = np.mean(list(warnings_dict[key] for warnings_dict in warnings_dicts))
-            result["warnings"] = warning_dict
-        else:
-            raise NotImplementedError("Sequential mode has not been implemented yet")
-
-        benchmark_time = perf_counter() - start_time
-        self.benchmark_times.append(benchmark_time)
-
-        grandmean, times = get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations)
-        result["times"] = times
-        result["time"] = grandmean
-        print(f"Grandmean: {grandmean} in {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}\n")
-        # print(f"Grandmean: {grandmean}, mean MWP per iteration: {np.mean(times)}, std MWP per iteration: {np.std(times)}")
-        # print(f"In {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
-        return result
-
-        # old implementation
-
-        # start_time = perf_counter()
-        # if self.parallel_mode:
-        #     num_procs = max(cpu_count() - 1, 1)
-        #     logging.debug(f"Running benchmark in parallel on {num_procs} processors")
-        #     manager = Manager()
-        #     MRE_values = manager.list()
-        #     runtimes = manager.list()
-        #     with get_context('spawn').Pool(num_procs) as pool:    # spawn alternative is forkserver, creates a reusable server
-        #         args = func, args, self.params
-        #         MRE_values, runtimes = zip(*pool.starmap(run_kernel_and_observers, zip(iterator, repeat(args))))
-        #         MRE_values, runtimes = list(MRE_values), list(runtimes)
-        #         print(MRE_values)
-        #     result["times"] = values
-        #     result["strategy_time"] = np.mean(runtimes)
-        #     np_results = np.array(values)
-
-        # # sequential implementation
-        # np_results = np.array([])
-        # for iter in iterator:
-        #     for obs in self.observers:
-        #         obs.before_start()
-        #     value = self.run_kernel(func, args)
-        #     for obs in self.observers:
-        #         obs.after_finish()
-
-        #     if value < 0.0:
-        #         raise ValueError("Invalid benchmark result")
-
-        #     result["times"].append(value)
-        #     np_results = np.append(np_results, value)
-        #     if value >= invalid_value and iter >= min_valid_iterations and len(np_results[np_results < invalid_value]) < min_valid_iterations:
-        #         break
-
-        # # fill up the remaining iters with invalid in case of a break
-        # result["times"] += [invalid_value] * (self.iterations - len(result["times"]))
-
-        # # finish by instrumenting the results with the observers
-        # for obs in self.observers:
-        #     result.update(obs.get_results())
-
-        # benchmark_time = perf_counter() - start_time
-        # self.benchmark_times.append(benchmark_time)
-        # print(f"Time taken: {round(benchmark_time, 3)} seconds, mean: {round(np.mean(self.benchmark_times), 3)}")
-
-        # # calculate the mean of the means of the Mean Relative Error over the valid results
-        # valid_results = np_results[np_results < invalid_value]
-        # mean_mean_MRE = np.mean(valid_results) if len(valid_results) > 0 else np.nan
-
-        # # write the 'time' to the results and return
-        # if np.isnan(mean_mean_MRE) or len(valid_results) < min_valid_iterations:
-        #     mean_mean_MRE = invalid_value
-        # result["time"] = mean_mean_MRE
-        # return result
-
-    def run_kernel(self, func, args, threads, grid):
-        """runs the kernel once, returns whatever the kernel returns
-
-        :param func: A Python function for this specific configuration
-        :type func: ctypes._FuncPtr
-
-        :param args: A list of arguments to the function, order should match the
-            order in the code. The list should be prepared using
-            ready_argument_list().
-        :type args: list(Argument)
-
-        :param threads: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type threads: any
-
-        :param grid: Ignored, but left as argument for now to have the same
-            interface as CudaFunctions and OpenCLFunctions.
-        :type grid: any
-
-        :returns: A robust average of values returned by the C function.
-        :rtype: float
-        """
-        logging.debug("run_kernel")
-        logging.debug("arguments=" + str([str(arg) for arg in args]))
-
-        time = func(*args, **self.params)
-
-        return time
-
-    units = {}
-
-
-def run_hyperparam_kernel_and_observers(iter, args) -> Tuple[list, float, dict]:
-    """ Function to run a hyperparam kernel directly for parallel processing. Must be outside the class to avoid pickling issues due to large scope. """
-    PID = getpid()
-    # print(f"Iter {iter+1}, PID {PID}", flush=True)
-    func, funcargs, params, invalid_flag = args
-    logging.debug(f"run_kernel iter {iter} (PID {PID})")
-    logging.debug("arguments=" + str([str(arg) for arg in funcargs]))
-
-    # run the kernel
-    starttime = perf_counter()
-    # cProfile.runctx('func(invalid_flag, *funcargs, **params)', globals(), locals(), 'profile-%s.out' % str(iter + 1))
-    # values, warning_dict = None, None
-    values, warning_dict = func(invalid_flag, *funcargs, **params)
-    runtime = perf_counter() - starttime
-    return values, runtime, warning_dict
-
-
-def run_hyperparam_kernel_as_subprocess(iter, args):
-    """ Function to run a hyperparam kernel as a subprocess for parallel processing. Must be outside the class to avoid pickling issues due to large scope. Significantly slower than run_kernel, but guaranteed to be a different process. Observers are not implemented."""
-    func, args, params = args
-    PID = getpid()
-    # print(f"Iter {iter}, PID {PID}", flush=True)
-    logging.debug(f"run_kernel as subprocess {iter} (PID {PID})")
-    logging.debug("arguments=" + str([str(arg) for arg in args]))
-
-    def make_kwargstrings(**kwargs) -> list:
-        return list(f"{key}={value}" for key, value in kwargs.items())
-
-    # Subprocess
-    args += make_kwargstrings(**params)
-    proc = subprocess.run([sys.executable or 'python', str(func.__name__ + '.py')] + args, shell=False, capture_output=True)
-    stderr = f"subprocess {iter} with PID {PID} errors: {proc.stderr.decode('utf-8')}" if len(proc.stderr.decode('utf-8')) > 0 else ""
-    stdout = f"subprocess {iter} with PID {PID} output: {proc.stdout.decode('utf-8')}" if len(proc.stdout.decode('utf-8')) > 0 else ""
-
-    if stderr != "":
-        logging.debug(stderr)
-        print(stderr)
-    if stdout != "":
-        logging.debug(stdout)
-        # print(stdout)
-
-    time = float(stdout.split("result_value=")[1])
-    return time
-
-
-def get_hyperparam_grandmedian_and_times(MWP_values, invalid_value, min_valid_iterations=1):
-    """ Get the grandmean (mean of median MWP per kernel) and mean MWP per iteration """
-    MWP_values = np.array(MWP_values)
-    median_MWPs = np.array([])
-    median_MWPs_vars = np.array([])
-    valid_MWP_times = list()
-    # get the mean MWP per kernel
-    for i in range(len(MWP_values[0])):
-        MWP_kernel_values = MWP_values[:, i]
-        valid_MWP_mask = (MWP_kernel_values < invalid_value) & (MWP_kernel_values >= 0)
-        valid_MWP_kernel_values = MWP_kernel_values[valid_MWP_mask]
-        if len(valid_MWP_kernel_values) >= min_valid_iterations:
-            # # filter outliers by keeping only values that are within two times the Median Absolute Deviation
-            # AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
-            # MAD = np.median(AD)
-            # selected_MWP_kernel_values = valid_MWP_kernel_values[AD < MAD * 3]
-            # print(f"Removed {len(valid_MWP_kernel_values) - len(selected_MWP_kernel_values)}")
-            # median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
-            # median_MWPs = np.append(median_MWPs, np.mean(valid_MWP_kernel_values))
-
-            # filter outliers by keeping only values that are within three times the Median Absolute Deviation
-            AD = np.abs(valid_MWP_kernel_values - np.median(valid_MWP_kernel_values))
-            MAD = np.median(AD)
-            MAD_score = AD / MAD if MAD else 0.0
-            selected_MWP_kernel_values = valid_MWP_kernel_values[MAD_score < 3]
-            median_MWPs = np.append(median_MWPs, np.median(selected_MWP_kernel_values))
-            median_MWPs_vars = np.append(median_MWPs_vars, np.std(selected_MWP_kernel_values))
-        else:
-            median_MWPs = np.append(median_MWPs, invalid_value)
-            median_MWPs_vars = np.append(median_MWPs_vars, 1)
-
-    # get the mean MWP per iteration
-    for i in range(len(MWP_values)):
-        MWP_iteration_values = MWP_values[i]
-        valid_MWP_mask = (MWP_iteration_values < invalid_value) & (MWP_iteration_values >= 0)
-        valid_MWP_iteration_values = MWP_iteration_values[valid_MWP_mask]
-        if len(valid_MWP_iteration_values) > 0:
-            valid_MWP_times.append(np.mean(valid_MWP_iteration_values))
-        else:
-            valid_MWP_times.append(invalid_value)
-
-    # get the grandmean by taking the inverse-variance weighted average over the median MWP per kernel, invalid if one of the kernels is invalid
-    print(median_MWPs)
-    print(median_MWPs / median_MWPs_vars, np.sum(1 / median_MWPs_vars), np.std(median_MWPs / median_MWPs_vars))
-    inverse_variance_weighted_average = np.sum(median_MWPs / median_MWPs_vars) / np.sum(1 / median_MWPs_vars)
-    grandmean_MWP = inverse_variance_weighted_average
-    if np.isnan(grandmean_MWP) or len(median_MWPs[median_MWPs >= invalid_value]) > 0:
-        grandmean_MWP = invalid_value
-    return grandmean_MWP, valid_MWP_times

From 3dbe379d2a967f62eb70bda4422980107588db1f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:27:27 +0100
Subject: [PATCH 148/168] Removed bayes_opt_old as a strategy

---
 kernel_tuner/strategies/bayes_opt_old.py | 833 -----------------------
 test/strategies/test_strategies.py       |   4 +-
 2 files changed, 2 insertions(+), 835 deletions(-)
 delete mode 100644 kernel_tuner/strategies/bayes_opt_old.py

diff --git a/kernel_tuner/strategies/bayes_opt_old.py b/kernel_tuner/strategies/bayes_opt_old.py
deleted file mode 100644
index a55790e66..000000000
--- a/kernel_tuner/strategies/bayes_opt_old.py
+++ /dev/null
@@ -1,833 +0,0 @@
-"""Bayesian Optimization implementation from the thesis by Willemsen."""
-import itertools
-import time
-import warnings
-from copy import deepcopy
-from random import randint, shuffle
-
-import numpy as np
-
-# BO imports
-try:
-    from typing import Tuple
-
-    from scipy.stats import norm
-    from sklearn.exceptions import ConvergenceWarning
-    from sklearn.gaussian_process import GaussianProcessRegressor
-    from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
-    from skopt.sampler import Lhs
-    bayes_opt_present = True
-except ImportError:
-    bayes_opt_present = False
-
-from kernel_tuner import util
-from kernel_tuner.strategies import minimize
-
-supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]
-
-
-def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]:
-    """Generates normalization and denormalization dictionaries."""
-    original_to_normalized = dict()
-    normalized_to_original = dict()
-    for param_name in tune_params.keys():
-        original_to_normalized_dict = dict()
-        normalized_to_original_dict = dict()
-        for value_index, value in enumerate(tune_params[param_name]):
-            normalized_value = eps * value_index + 0.5 * eps
-            normalized_to_original_dict[normalized_value] = value
-            original_to_normalized_dict[value] = normalized_value
-        original_to_normalized[param_name] = original_to_normalized_dict
-        normalized_to_original[param_name] = normalized_to_original_dict
-    return original_to_normalized, normalized_to_original
-
-
-def normalize_parameter_space(param_space: list, tune_params: dict, normalized: dict) -> list:
-    """Normalize the parameter space given a normalization dictionary."""
-    keys = list(tune_params.keys())
-    param_space_normalized = list(tuple(normalized[keys[i]][v] for i, v in enumerate(params)) for params in param_space)
-    return param_space_normalized
-
-
-def prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict):
-    """Pruning of the parameter space to remove dimensions that have a constant parameter."""
-    pruned_tune_params_mask = list()
-    removed_tune_params = list()
-    param_names = list(tune_params.keys())
-    for index, key in enumerate(tune_params.keys()):
-        pruned_tune_params_mask.append(len(tune_params[key]) > 1)
-        if len(tune_params[key]) > 1:
-            removed_tune_params.append(None)
-        else:
-            value = tune_params[key][0]
-            normalized = normalize_dict[param_names[index]][value]
-            removed_tune_params.append(normalized)
-    if 'verbose' in tuning_options and tuning_options.verbose is True and len(tune_params.keys()) != sum(pruned_tune_params_mask):
-        print(f"Number of parameters (dimensions): {len(tune_params.keys())}, after pruning: {sum(pruned_tune_params_mask)}")
-    parameter_space = list(tuple(itertools.compress(param_config, pruned_tune_params_mask)) for param_config in parameter_space)
-    return parameter_space, removed_tune_params
-
-
-def tune(runner, kernel_options, device_options, tuning_options):
-    """Find the best performing kernel configuration in the parameter space.
-
-    :params runner: A runner from kernel_tuner.runners
-    :type runner: kernel_tuner.runner
-
-    :param kernel_options: A dictionary with all options for the kernel.
-    :type kernel_options: kernel_tuner.interface.Options
-
-    :param device_options: A dictionary with all options for the device
-        on which the kernel should be tuned.
-    :type device_options: kernel_tuner.interface.Options
-
-    :param tuning_options: A dictionary with all options regarding the tuning
-        process. Allows setting hyperparameters via the strategy_options key.
-    :type tuning_options: kernel_tuner.interface.Options
-
-    :returns: A list of dictionaries for executed kernel configurations and their
-        execution times. And a dictionary that contains a information
-        about the hardware/software environment on which the tuning took place.
-    :rtype: list(dict()), dict()
-
-    """
-    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
-    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
-    if not bayes_opt_present:
-        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed")
-
-    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
-    tune_params = tuning_options.tune_params
-    tuning_options["scaling"] = True
-    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)
-
-    # compute cartesian product of all tunable parameters
-    parameter_space = itertools.product(*tune_params.values())
-
-    # check for search space restrictions
-    if tuning_options.restrictions is not None:
-        tuning_options.verbose = False
-    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
-    parameter_space = list(parameter_space)
-    if len(parameter_space) < 1:
-        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
-    if len(parameter_space) == 1:
-        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")
-
-    # normalize search space to [0,1]
-    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
-    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)
-
-    # prune the parameter space to remove dimensions that have a constant parameter
-    if prune_parameterspace:
-        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
-    else:
-        parameter_space = list(parameter_space)
-        removed_tune_params = [None] * len(tune_params.keys())
-
-    # initialize and optimize
-    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
-    results = bo.optimize(max_fevals)
-
-    return results, runner.dev.get_environment()
-
-
-class BayesianOptimization():
-
-    def __init__(self, searchspace: list, removed_tune_params: list, kernel_options: dict, tuning_options: dict, normalize_dict: dict, denormalize_dict: dict,
-                 runner, opt_direction='min'):
-        time_start = time.perf_counter_ns()
-
-        # supported hyperparameter values
-        self.supported_cov_kernels = ["constantrbf", "rbf", "matern32", "matern52"]
-        self.supported_methods = supported_methods
-        self.supported_sampling_methods = ["random", "lhs"]
-        self.supported_sampling_criterion = ["correlation", "ratio", "maximin", None]
-
-        def get_hyperparam(name: str, default, supported_values=list()):
-            value = tuning_options.strategy_options.get(name, default)
-            if len(supported_values) > 0 and value not in supported_values:
-                raise ValueError(f"'{name}' is set to {value}, but must be one of {supported_values}")
-            return value
-
-        # get hyperparameters
-        cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels)
-        cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5)
-        acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods)
-        acq = acquisition_function
-        acq_params = get_hyperparam("methodparams", {})
-        multi_af_names = get_hyperparam("multi_af_names", ['ei', 'poi', 'lcb'])
-        self.multi_afs_discount_factor = get_hyperparam("multi_af_discount_factor", 0.65 if acq == 'multi' else 0.95)
-        self.multi_afs_required_improvement_factor = get_hyperparam("multi_afs_required_improvement_factor", 0.15 if acq == 'multi-advanced-precise' else 0.1)
-        self.num_initial_samples = get_hyperparam("popsize", 20)
-        self.sampling_method = get_hyperparam("samplingmethod", "lhs", self.supported_sampling_methods)
-        self.sampling_crit = get_hyperparam("samplingcriterion", 'maximin', self.supported_sampling_criterion)
-        self.sampling_iter = get_hyperparam("samplingiterations", 1000)
-
-        # set acquisition function hyperparameter defaults where missing
-        if 'explorationfactor' not in acq_params:
-            acq_params['explorationfactor'] = 'CV'
-        if 'zeta' not in acq_params:
-            acq_params['zeta'] = 1
-        if 'skip_duplicate_after' not in acq_params:
-            acq_params['skip_duplicate_after'] = 5
-
-        # set arguments
-        self.kernel_options = kernel_options
-        self.tuning_options = tuning_options
-        self.tune_params = tuning_options.tune_params
-        self.param_names = list(self.tune_params.keys())
-        self.normalized_dict = normalize_dict
-        self.denormalized_dict = denormalize_dict
-        self.runner = runner
-        self.max_threads = runner.dev.max_threads
-        self.log_timings = False
-
-        # set optimization constants
-        self.invalid_value = 1e20
-        self.opt_direction = opt_direction
-        if opt_direction == 'min':
-            self.worst_value = np.inf
-            self.argopt = np.argmin
-        elif opt_direction == 'max':
-            self.worst_value = np.NINF
-            self.argopt = np.argmax
-        else:
-            raise ValueError("Invalid optimization direction '{}'".format(opt_direction))
-
-        # set the acquisition function and surrogate model
-        self.optimize = self.__optimize
-        self.af_name = acquisition_function
-        self.af_params = acq_params
-        self.multi_afs = list(self.get_af_by_name(af_name) for af_name in multi_af_names)
-        self.set_acquisition_function(acquisition_function)
-        self.set_surrogate_model(cov_kernel_name, cov_kernel_lengthscale)
-
-        # set remaining values
-        self.results = []
-        self.__searchspace = searchspace
-        self.removed_tune_params = removed_tune_params
-        self.searchspace_size = len(self.searchspace)
-        self.num_dimensions = len(self.dimensions())
-        self.__current_optimum = self.worst_value
-        self.cv_norm_maximum = None
-        self.fevals = 0
-        self.__visited_num = 0
-        self.__visited_valid_num = 0
-        self.__visited_searchspace_indices = [False] * self.searchspace_size
-        self.__observations = [np.NaN] * self.searchspace_size
-        self.__valid_observation_indices = [False] * self.searchspace_size
-        self.__valid_params = list()
-        self.__valid_observations = list()
-        self.unvisited_cache = self.unvisited()
-        time_setup = time.perf_counter_ns()
-        self.error_message_searchspace_fully_observed = "The search space has been fully observed"
-
-        # take initial sample
-        if self.num_initial_samples > 0:
-            self.initial_sample()
-            time_initial_sample = time.perf_counter_ns()
-
-        # print the timings
-        if self.log_timings:
-            time_taken_setup = round(time_setup - time_start, 3) / 1000
-            time_taken_initial_sample = round(time_initial_sample - time_setup, 3) / 1000
-            time_taken_total = round(time_initial_sample - time_start, 3) / 1000
-            print(f"Initialization | total time: {time_taken_total} | Setup: {time_taken_setup} | Initial sample: {time_taken_initial_sample}", flush=True)
-
-    @property
-    def searchspace(self):
-        return self.__searchspace
-
-    @property
-    def observations(self):
-        return self.__observations
-
-    @property
-    def current_optimum(self):
-        return self.__current_optimum
-
-    @current_optimum.setter
-    def current_optimum(self, value: float):
-        self.__current_optimum = value
-
-    def is_better_than(self, a: float, b: float) -> bool:
-        """Determines which one is better depending on optimization direction."""
-        return a < b if self.opt_direction == 'min' else a > b
-
-    def is_not_visited(self, index: int) -> bool:
-        """Returns whether a searchspace index has not been visited."""
-        return not self.__visited_searchspace_indices[index]
-
-    def is_valid(self, observation: float) -> bool:
-        """Returns whether an observation is valid."""
-        return not (observation is None or observation == self.invalid_value or observation == np.NaN)
-
-    def get_af_by_name(self, name: str):
-        """Get the basic acquisition functions by their name."""
-        basic_af_names = ['ei', 'poi', 'lcb']
-        if name == 'ei':
-            return self.af_expected_improvement
-        elif name == 'poi':
-            return self.af_probability_of_improvement
-        elif name == 'lcb':
-            return self.af_lower_confidence_bound
-        raise ValueError(f"{name} not in {basic_af_names}")
-
-    def set_acquisition_function(self, acquisition_function: str):
-        """Set the acquisition function."""
-        if acquisition_function == 'poi':
-            self.__af = self.af_probability_of_improvement
-        elif acquisition_function == 'ei':
-            self.__af = self.af_expected_improvement
-        elif acquisition_function == 'lcb':
-            self.__af = self.af_lower_confidence_bound
-        elif acquisition_function == 'lcb-srinivas':
-            self.__af = self.af_lower_confidence_bound_srinivas
-        elif acquisition_function == 'random':
-            self.__af = self.af_random
-        elif acquisition_function == 'multi':
-            self.optimize = self.__optimize_multi
-        elif acquisition_function == 'multi-advanced':
-            self.optimize = self.__optimize_multi_advanced
-        elif acquisition_function == 'multi-fast':
-            self.optimize = self.__optimize_multi_fast
-        else:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function))
-
-    def set_surrogate_model(self, cov_kernel_name: str, cov_kernel_lengthscale: float):
-        """Set the surrogate model with a covariance function and lengthscale."""
-        if cov_kernel_name == "constantrbf":
-            kernel = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "rbf":
-            kernel = RBF(length_scale=cov_kernel_lengthscale, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern32":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=1.5, length_scale_bounds="fixed")
-        elif cov_kernel_name == "matern52":
-            kernel = Matern(length_scale=cov_kernel_lengthscale, nu=2.5, length_scale_bounds="fixed")
-        else:
-            raise ValueError("Acquisition function must be one of {}, is {}".format(self.supported_cov_kernels, cov_kernel_name))
-        self.__model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, normalize_y=True)    # maybe change alpha to a higher value such as 1e-5?
-
-    def valid_params_observations(self) -> Tuple[list, list]:
-        """Returns a list of valid observations and their parameter configurations."""
-        # if you do this every iteration, better keep it as cache and update in update_after_evaluation
-        params = list()
-        observations = list()
-        for index, valid in enumerate(self.__valid_observation_indices):
-            if valid is True:
-                params.append(self.searchspace[index])
-                observations.append(self.observations[index])
-        return params, observations
-
-    def unvisited(self) -> list:
-        """Returns a list of unvisited parameter configurations - attention: cached version exists!"""
-        params = list(self.searchspace[index] for index, visited in enumerate(self.__visited_searchspace_indices) if visited is False)
-        return params
-
-    def find_param_config_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the search space if it exists."""
-        return self.searchspace.index(param_config)
-
-    def find_param_config_unvisited_index(self, param_config: tuple) -> int:
-        """Find a parameter config index in the unvisited cache if it exists."""
-        return self.unvisited_cache.index(param_config)
-
-    def normalize_param_config(self, param_config: tuple) -> tuple:
-        """Normalizes a parameter configuration."""
-        normalized = tuple(self.normalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return normalized
-
-    def denormalize_param_config(self, param_config: tuple) -> tuple:
-        """Denormalizes a parameter configuration."""
-        denormalized = tuple(self.denormalized_dict[self.param_names[index]][param_value] for index, param_value in enumerate(param_config))
-        return denormalized
-
-    def unprune_param_config(self, param_config: tuple) -> tuple:
-        """In case of pruned dimensions, adds the removed dimensions back in the param config."""
-        unpruned = list()
-        pruned_count = 0
-        for removed in self.removed_tune_params:
-            if removed is not None:
-                unpruned.append(removed)
-            else:
-                unpruned.append(param_config[pruned_count])
-                pruned_count += 1
-        return tuple(unpruned)
-
-    def update_after_evaluation(self, observation: float, index: int, param_config: tuple):
-        """Adjust the visited and valid index records accordingly."""
-        validity = self.is_valid(observation)
-        self.__visited_num += 1
-        self.__observations[index] = observation
-        self.__visited_searchspace_indices[index] = True
-        del self.unvisited_cache[self.find_param_config_unvisited_index(param_config)]
-        self.__valid_observation_indices[index] = validity
-        if validity is True:
-            self.__visited_valid_num += 1
-            self.__valid_params.append(param_config)
-            self.__valid_observations.append(observation)
-            if self.is_better_than(observation, self.current_optimum):
-                self.current_optimum = observation
-
-    def predict(self, x) -> Tuple[float, float]:
-        """Returns a mean and standard deviation predicted by the surrogate model for the parameter configuration."""
-        return self.__model.predict([x], return_std=True)
-
-    def predict_list(self, lst: list) -> Tuple[list, list, list]:
-        """Returns a list of means and standard deviations predicted by the surrogate model for the parameter configurations, and separate lists of means and standard deviations."""
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            mu, std = self.__model.predict(lst, return_std=True)
-            return list(zip(mu, std)), mu, std
-
-    def fit_observations_to_model(self):
-        """Update the model based on the current list of observations."""
-        self.__model.fit(self.__valid_params, self.__valid_observations)
-
-    def evaluate_objective_function(self, param_config: tuple) -> float:
-        """Evaluates the objective function."""
-        param_config = self.unprune_param_config(param_config)
-        denormalized_param_config = self.denormalize_param_config(param_config)
-        if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads):
-            return self.invalid_value
-        val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-        self.fevals += 1
-        return val
-
-    def dimensions(self) -> list:
-        """List of parameter values per parameter."""
-        return self.tune_params.values()
-
-    def draw_random_sample(self) -> Tuple[list, int]:
-        """Draw a random sample from the unvisited parameter configurations."""
-        if len(self.unvisited_cache) < 1:
-            raise ValueError("Searchspace exhausted during random sample draw as no valid configurations were found")
-        index = randint(0, len(self.unvisited_cache) - 1)    # NOSONAR
-        param_config = self.unvisited_cache[index]
-        actual_index = self.find_param_config_index(param_config)
-        return param_config, actual_index
-
-    def draw_latin_hypercube_samples(self, num_samples: int) -> list:
-        """Draws an LHS-distributed sample from the search space."""
-        if self.searchspace_size < num_samples:
-            raise ValueError("Can't sample more than the size of the search space")
-        if self.sampling_crit is None:
-            lhs = Lhs(lhs_type="centered", criterion=None)
-        else:
-            lhs = Lhs(lhs_type="classic", criterion=self.sampling_crit, iterations=self.sampling_iter)
-        param_configs = lhs.generate(self.dimensions(), num_samples)
-        indices = list()
-        normalized_param_configs = list()
-        for i in range(len(param_configs) - 1):
-            try:
-                param_config = self.normalize_param_config(param_configs[i])
-                index = self.find_param_config_index(param_config)
-                indices.append(index)
-                normalized_param_configs.append(param_config)
-            except ValueError:
-                """ Due to search space restrictions, the search space may not be an exact cartesian product of the tunable parameter values.
-                It is thus possible for LHS to generate a parameter combination that is not in the actual searchspace, which must be skipped. """
-                continue
-        return list(zip(normalized_param_configs, indices))
-
-    def initial_sample(self):
-        """Draws an initial sample using random sampling."""
-        if self.num_initial_samples <= 0:
-            raise ValueError("At least one initial sample is required")
-        if self.sampling_method == 'lhs':
-            samples = self.draw_latin_hypercube_samples(self.num_initial_samples)
-        elif self.sampling_method == 'random':
-            samples = list()
-        else:
-            raise ValueError("Sampling method must be one of {}, is {}".format(self.supported_sampling_methods, self.sampling_method))
-        # collect the samples
-        collected_samples = 0
-        for params, index in samples:
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            if self.is_valid(observation):
-                collected_samples += 1
-        # collect the remainder of the samples
-        while collected_samples < self.num_initial_samples:
-            params, index = self.draw_random_sample()
-            observation = self.evaluate_objective_function(params)
-            self.update_after_evaluation(observation, index, params)
-            # check for validity to avoid having no actual initial samples
-            if self.is_valid(observation):
-                collected_samples += 1
-        self.fit_observations_to_model()
-        _, _, std = self.predict_list(self.unvisited_cache)
-        self.initial_sample_mean = np.mean(self.__valid_observations)
-        # Alternatively:
-        # self.initial_sample_std = np.std(self.__valid_observations)
-        # self.initial_sample_mean = np.mean(predictions)
-        self.initial_std = np.mean(std)
-        self.cv_norm_maximum = self.initial_std
-
-    def contextual_variance(self, std: list):
-        """Contextual improvement to decide explore / exploit, based on CI proposed by (Jasrasaria, 2018)."""
-        if not self.af_params['explorationfactor'] == 'CV':
-            return None
-        if self.opt_direction == 'min':
-            if self.current_optimum == self.worst_value:
-                return 0.01
-            if self.current_optimum <= 0:
-                # doesn't work well for minimization beyond 0, should that even be a thing?
-                return abs(np.mean(std) / self.current_optimum)
-            improvement_over_initial_sample = self.initial_sample_mean / self.current_optimum
-            cv = np.mean(std) / improvement_over_initial_sample
-            # normalize if available
-            if self.cv_norm_maximum:
-                cv = cv / self.cv_norm_maximum
-            return cv
-        return np.mean(std) / self.current_optimum
-
-    def __optimize(self, max_fevals):
-        """Find the next best candidate configuration(s), evaluate those and update the model accordingly."""
-        while self.fevals < max_fevals:
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
-            list_of_acquisition_values = self.__af(predictions, hyperparam)
-            # afterwards select the best AF value
-            best_af = self.argopt(list_of_acquisition_values)
-            candidate_params = self.unvisited_cache[best_af]
-            candidate_index = self.find_param_config_index(candidate_params)
-            observation = self.evaluate_objective_function(candidate_params)
-            self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.fit_observations_to_model()
-        return self.results
-
-    def __optimize_multi(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are always only taken once. Skips AFs if they suggest X/max_evals duplicates in a row, prefers AF with best discounted average."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        # calculate how many times an AF can suggest a duplicate candidate before the AF is skipped
-        # skip_duplicates_fraction = self.af_params['skip_duplicates_fraction']
-        # skip_if_duplicate_n_times = int(min(max(round(skip_duplicates_fraction * max_fevals), 3), max_fevals))
-        skip_if_duplicate_n_times = self.af_params['skip_duplicate_after']
-        discount_factor = self.multi_afs_discount_factor
-        # setup the registration of duplicates and runtimes
-        duplicate_count_template = [0 for _ in range(skip_if_duplicate_n_times)]
-        duplicate_candidate_af_count = list(deepcopy(duplicate_count_template) for _ in range(3))
-        skip_af_index = list()
-        af_runtimes = [0, 0, 0]
-        af_observations = [list(), list(), list()]
-        initial_sample_mean = np.mean(self.__valid_observations)
-        while self.fevals < max_fevals:
-            time_start = time.perf_counter_ns()
-            # the first acquisition function is never skipped, so that should be the best for the endgame (EI)
-            aqfs = self.multi_afs
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            time_predictions = time.perf_counter_ns()
-            actual_candidate_params = list()
-            actual_candidate_indices = list()
-            actual_candidate_af_indices = list()
-            duplicate_candidate_af_indices = list()
-            duplicate_candidate_original_af_indices = list()
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                timer_start = time.perf_counter()
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                time_taken = time.perf_counter() - timer_start
-                af_runtimes[af_index] += time_taken
-                is_duplicate = best_af in actual_candidate_indices
-                if not is_duplicate:
-                    candidate_params = self.unvisited_cache[best_af]
-                    actual_candidate_params.append(candidate_params)
-                    actual_candidate_indices.append(best_af)
-                    actual_candidate_af_indices.append(af_index)
-                # register whether the AF suggested a duplicate candidate
-                duplicate_candidate_af_count[af_index].pop(0)
-                duplicate_candidate_af_count[af_index].append(1 if is_duplicate else 0)
-                if is_duplicate:
-                    # find the index of the AF that first registered the duplicate
-                    original_duplicate_af_index = actual_candidate_af_indices[actual_candidate_indices.index(best_af)]
-                    # register that AF as duplicate as well
-                    duplicate_candidate_af_count[original_duplicate_af_index][-1] = 1
-                    duplicate_candidate_af_indices.append(af_index)
-                    duplicate_candidate_original_af_indices.append(original_duplicate_af_index)
-            time_afs = time.perf_counter_ns()
-            # evaluate the non-duplicate candidates
-            for index, af_index in enumerate(actual_candidate_af_indices):
-                candidate_params = actual_candidate_params[index]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if observation != self.invalid_value:
-                    # we use the registered observations for maximization of the discounted reward
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[actual_candidate_af_indices[index]].append(reg_observation)
-                else:
-                    reg_invalid_observation = initial_sample_mean if self.opt_direction == 'min' else -1 * initial_sample_mean
-                    af_observations[actual_candidate_af_indices[index]].append(reg_invalid_observation)
-            for index, af_index in enumerate(duplicate_candidate_af_indices):
-                original_observation = af_observations[duplicate_candidate_original_af_indices[index]][-1]
-                af_observations[af_index].append(original_observation)
-            self.fit_observations_to_model()
-            time_eval = time.perf_counter_ns()
-            # assert that all observation lists of non-skipped acquisition functions are of the same length
-            non_skipped_af_indices = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-            assert all(len(af_observations[non_skipped_af_indices[0]]) == len(af_observations[af_index]) for af_index in non_skipped_af_indices)
-            # find the AFs elligble for being skipped
-            candidates_for_skip = list()
-            for af_index, count in enumerate(duplicate_candidate_af_count):
-                if sum(count) >= skip_if_duplicate_n_times and af_index not in skip_af_index:
-                    candidates_for_skip.append(af_index)
-            # do not skip the AF with the lowest runtime
-            if len(candidates_for_skip) > 1:
-                candidates_for_skip_discounted = list(
-                    sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations)))
-                    for af_index, observations in enumerate(af_observations) if af_index in candidates_for_skip)
-                af_not_to_skip = candidates_for_skip[np.argmin(candidates_for_skip_discounted)]
-                for af_index in candidates_for_skip:
-                    if af_index == af_not_to_skip:
-                        # do not skip the AF with the lowest runtime and give it a clean slate
-                        duplicate_candidate_af_count[af_index] = deepcopy(duplicate_count_template)
-                        continue
-                    skip_af_index.append(af_index)
-                    if len(skip_af_index) >= len(aqfs):
-                        raise ValueError("There are no acquisition functions left! This should not happen...")
-            time_af_selection = time.perf_counter_ns()
-
-            # printing timings
-            if self.log_timings:
-                time_taken_predictions = round(time_predictions - time_start, 3) / 1000
-                time_taken_afs = round(time_afs - time_predictions, 3) / 1000
-                time_taken_eval = round(time_eval - time_afs, 3) / 1000
-                time_taken_af_selection = round(time_af_selection - time_eval, 3) / 1000
-                time_taken_total = round(time_af_selection - time_start, 3) / 1000
-                print(
-                    f"({self.fevals}/{max_fevals}) Total time: {time_taken_total} | Predictions: {time_taken_predictions} | AFs: {time_taken_afs} | Eval: {time_taken_eval} | AF selection: {time_taken_af_selection}",
-                    flush=True)
-        return self.results
-
-    def __optimize_multi_advanced(self, max_fevals, increase_precision=False):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, unless increase_precision is true. Skips AFs if they are consistently worse than the mean of discounted observations, promotes AFs if they are consistently better than this mean."""
-        if self.opt_direction != 'min':
-            raise ValueError(f"Optimization direction must be minimization ('min'), is {self.opt_direction}")
-        aqfs = self.multi_afs
-        discount_factor = self.multi_afs_discount_factor
-        required_improvement_factor = self.multi_afs_required_improvement_factor
-        required_improvement_worse = 1 + required_improvement_factor
-        required_improvement_better = 1 - required_improvement_factor
-        min_required_count = self.af_params['skip_duplicate_after']
-        skip_af_index = list()
-        single_af = len(aqfs) <= len(skip_af_index) + 1
-        af_observations = [list(), list(), list()]
-        af_performs_worse_count = [0, 0, 0]
-        af_performs_better_count = [0, 0, 0]
-        while self.fevals < max_fevals:
-            if single_af:
-                return self.__optimize(max_fevals)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            observations_median = np.median(self.__valid_observations)
-            if increase_precision is False:
-                predictions, _, std = self.predict_list(self.unvisited_cache)
-                hyperparam = self.contextual_variance(std)
-            for af_index, af in enumerate(aqfs):
-                if af_index in skip_af_index:
-                    continue
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                if increase_precision is True:
-                    predictions, _, std = self.predict_list(self.unvisited_cache)
-                    hyperparam = self.contextual_variance(std)
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-                if increase_precision is True:
-                    self.fit_observations_to_model()
-                # we use the registered observations for maximization of the discounted reward
-                if observation != self.invalid_value:
-                    reg_observation = observation if self.opt_direction == 'min' else -1 * observation
-                    af_observations[af_index].append(reg_observation)
-                else:
-                    # if the observation is invalid, use the median of all valid observations to avoid skewing the discounted observations
-                    reg_invalid_observation = observations_median if self.opt_direction == 'min' else -1 * observations_median
-                    af_observations[af_index].append(reg_invalid_observation)
-            if increase_precision is False:
-                self.fit_observations_to_model()
-
-            # calculate the mean of discounted observations over the remaining acquisition functions
-            discounted_obs = list(
-                sum(list(obs * discount_factor**(len(observations) - 1 - i) for i, obs in enumerate(observations))) for observations in af_observations)
-            disc_obs_mean = np.mean(list(discounted_obs[af_index] for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index))
-
-            # register which AFs perform more than 10% better than average and which more than 10% worse than average
-            for af_index, discounted_observation in enumerate(discounted_obs):
-                if discounted_observation > disc_obs_mean * required_improvement_worse:
-                    af_performs_worse_count[af_index] += 1
-                elif discounted_observation < disc_obs_mean * required_improvement_better:
-                    af_performs_better_count[af_index] += 1
-
-            # find the worst AF, discounted observations is leading for a draw
-            worst_count = max(list(count for af_index, count in enumerate(af_performs_worse_count) if af_index not in skip_af_index))
-            af_index_worst = -1
-            if worst_count >= min_required_count:
-                for af_index, count in enumerate(af_performs_worse_count):
-                    if af_index not in skip_af_index and count == worst_count and (af_index_worst == -1
-                                                                                   or discounted_obs[af_index] > discounted_obs[af_index_worst]):
-                        af_index_worst = af_index
-
-            # skip the worst AF
-            if af_index_worst > -1:
-                skip_af_index.append(af_index_worst)
-                # reset the counts to even the playing field for the remaining AFs
-                af_performs_worse_count = [0, 0, 0]
-                af_performs_better_count = [0, 0, 0]
-                # if there is only one AF left, register as single AF
-                if len(aqfs) <= len(skip_af_index) + 1:
-                    single_af = True
-                    af_indices_left = list(af_index for af_index, _ in enumerate(aqfs) if af_index not in skip_af_index)
-                    assert len(af_indices_left) == 1
-                    self.__af = aqfs[af_indices_left[0]]
-            else:
-                # find the best AF, discounted observations is leading for a draw
-                best_count = max(list(count for af_index, count in enumerate(af_performs_better_count) if af_index not in skip_af_index))
-                af_index_best = -1
-                if best_count >= min_required_count:
-                    for af_index, count in enumerate(af_performs_better_count):
-                        if af_index not in skip_af_index and count == best_count and (af_index_best == -1
-                                                                                      or discounted_obs[af_index] < discounted_obs[af_index_best]):
-                            af_index_best = af_index
-                # make the best AF single
-                if af_index_best > -1:
-                    single_af = True
-                    self.__af = aqfs[af_index_best]
-
-        return self.results
-
-    def __optimize_multi_fast(self, max_fevals):
-        """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once."""
-        while self.fevals < max_fevals:
-            aqfs = self.multi_afs
-            # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            predictions, _, std = self.predict_list(self.unvisited_cache)
-            hyperparam = self.contextual_variance(std)
-            if self.__visited_num >= self.searchspace_size:
-                raise ValueError(self.error_message_searchspace_fully_observed)
-            for af in aqfs:
-                if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals:
-                    break
-                list_of_acquisition_values = af(predictions, hyperparam)
-                best_af = self.argopt(list_of_acquisition_values)
-                del predictions[best_af]    # to avoid going out of bounds
-                candidate_params = self.unvisited_cache[best_af]
-                candidate_index = self.find_param_config_index(candidate_params)
-                observation = self.evaluate_objective_function(candidate_params)
-                self.update_after_evaluation(observation, candidate_index, candidate_params)
-            self.fit_observations_to_model()
-        return self.results
-
-    def af_random(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function returning a randomly shuffled list for comparison."""
-        list_random = range(len(self.unvisited_cache))
-        shuffle(list_random)
-        return list_random
-
-    def af_probability_of_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Probability of Improvement (PI)."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement
-        list_diff_improvement = list(-((fplus - x_mu) / (x_std + 1E-9)) for (x_mu, x_std) in predictions)
-
-        # compute probability of improvement with CDF in bulk
-        list_prob_improvement = norm.cdf(list_diff_improvement)
-
-        return list_prob_improvement
-
-    def af_expected_improvement(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Expected Improvement (EI)."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        fplus = self.current_optimum - hyperparam
-
-        # precompute difference of improvement, CDF and PDF in bulk
-        list_diff_improvement = list((fplus - x_mu) / (x_std + 1E-9) for (x_mu, x_std) in predictions)
-        list_cdf = norm.cdf(list_diff_improvement)
-        list_pdf = norm.pdf(list_diff_improvement)
-
-        # specify AF calculation
-        def exp_improvement(index) -> float:
-            x_mu, x_std = predictions[index]
-            ei = (fplus - x_mu) * list_cdf[index] + x_std * list_pdf[index]
-            return -ei
-
-        # calculate AF
-        list_exp_improvement = list(map(exp_improvement, range(len(predictions))))
-        return list_exp_improvement
-
-    def af_lower_confidence_bound(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (LCB)."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-        beta = hyperparam
-
-        # compute LCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
-        return list_lower_confidence_bound
-
-    def af_lower_confidence_bound_srinivas(self, predictions=None, hyperparam=None) -> list:
-        """Acquisition function Lower Confidence Bound (UCB-S) after Srinivas, 2010 / Brochu, 2010."""
-        # prefetch required data
-        if predictions is None:
-            predictions, _, _ = self.predict_list(self.unvisited_cache)
-        if hyperparam is None:
-            hyperparam = self.af_params['explorationfactor']
-
-        # precompute beta parameter
-        zeta = self.af_params['zeta']
-        t = self.fevals
-        d = self.num_dimensions
-        delta = hyperparam
-        beta = np.sqrt(zeta * (2 * np.log((t**(d / 2. + 2)) * (np.pi**2) / (3. * delta))))
-
-        # compute UCB in bulk
-        list_lower_confidence_bound = list(x_mu - beta * x_std for (x_mu, x_std) in predictions)
-        return list_lower_confidence_bound
-
-    def visualize_after_opt(self):
-        """Visualize the model after the optimization."""
-        print(self.__model.kernel_.get_params())
-        print(self.__model.log_marginal_likelihood())
-        import matplotlib.pyplot as plt
-        _, mu, std = self.predict_list(self.searchspace)
-        brute_force_observations = list()
-        for param_config in self.searchspace:
-            obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results)
-            if obs == self.invalid_value:
-                obs = None
-            brute_force_observations.append(obs)
-        x_axis = range(len(mu))
-        plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True)
-        plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.')
-        plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.')
-        plt.legend()
-        plt.show()
diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py
index 8b2b92a45..67653190f 100644
--- a/test/strategies/test_strategies.py
+++ b/test/strategies/test_strategies.py
@@ -41,8 +41,8 @@ def vector_add():
 # skip some strategies if their dependencies are not installed
 strategies = []
 for s in strategy_map.keys():
-    if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower() or 'bayes_opt_old' in s.lower():
-        continue
+    if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower():
+        continue    # TODO issue warning for uninstalled dependencies?
     if 'gpytorch' in s.lower():
         strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch))
     elif 'botorch' in s.lower():

From dcd102ba24fcc7ba8f6c8eb2df7401287c4d0d3f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:49:28 +0100
Subject: [PATCH 149/168] Report last HIP error on error

---
 kernel_tuner/backends/hip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel_tuner/backends/hip.py b/kernel_tuner/backends/hip.py
index 1a0b7ce73..831123b42 100644
--- a/kernel_tuner/backends/hip.py
+++ b/kernel_tuner/backends/hip.py
@@ -40,7 +40,7 @@ def hip_check(call_result):
     if len(result) == 1:
         result = result[0]
     if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess:
-        raise RuntimeError(str(err))
+        raise RuntimeError(str(err), hip.hipGetLastError())
     return result
 
 

From 290a8605e41418b76cda2a634c04651ce9b04855 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:52:25 +0100
Subject: [PATCH 150/168] Added docstring to ScoreObserver class

---
 kernel_tuner/backends/hypertuner.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 66634e5c0..33a0e639c 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -17,6 +17,8 @@
 
 
 class ScoreObserver(BenchmarkObserver):
+    """BenchmarkObserver subclass for registering the hyperparameter tuning score."""
+
     def __init__(self, dev):
         self.dev = dev
         self.scores = []

From 496af9410938566f89abbceb4e457e56ebd53584 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 14:52:44 +0100
Subject: [PATCH 151/168] Reduced cognitive complexity

---
 kernel_tuner/strategies/bayes_opt.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py
index 451a0d5eb..66e360009 100644
--- a/kernel_tuner/strategies/bayes_opt.py
+++ b/kernel_tuner/strategies/bayes_opt.py
@@ -860,10 +860,8 @@ def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5):
         while self.fevals < max_fevals:
             aqfs = self.multi_afs
             # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model
-            if (
-                last_prediction_time * predict_eval_ratio <= last_eval_time
-                or last_prediction_counter >= predict_eval_ratio
-            ):
+            fit_observations = last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio
+            if fit_observations:
                 last_prediction_counter = 0
                 pred_start = time.perf_counter()
                 if last_eval_time > 0.0:

From c1c3a718f182b06a46647388d605f1fb988ba658 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 26 Mar 2025 15:13:30 +0100
Subject: [PATCH 152/168] Improved development environment creation
 specification

---
 doc/source/dev-environment.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/dev-environment.rst b/doc/source/dev-environment.rst
index 570a8c970..0adb3c83e 100644
--- a/doc/source/dev-environment.rst
+++ b/doc/source/dev-environment.rst
@@ -78,7 +78,7 @@ Steps without :bash:`sudo` access (e.g. on a cluster):
     * Verify that your development environment has no missing installs or updates with :bash:`poetry install --sync --dry-run --with test`. 
 #. Check if the environment is setup correctly by running :bash:`pytest`. All tests should pass, except if you're not on a GPU node, or one or more extras has been left out in the previous step, then these tests will skip gracefully.
 #. Set Nox to use the correct backend and location:
-    * Run :bash:`conda -- create-settings-file` to automatically create a settings file. 
+    * Run :bash:`nox -- create-settings-file` to automatically create a settings file. 
     * In this settings file :bash:`noxsettings.toml`, change the :bash:`venvbackend`:
         * If you used Mamba in step 2, to :bash:`mamba`.
         * If you used Miniconda or Anaconda in step 2, to :bash:`conda`.

From 54010b4c48feae125eca0a938d68955164aef39e Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Wed, 30 Apr 2025 10:08:34 +0200
Subject: [PATCH 153/168] introduced repair technique in genetic algorithm

---
 kernel_tuner/strategies/genetic_algorithm.py | 148 ++++++++++++-------
 kernel_tuner/strategies/greedy_ils.py        |   9 +-
 test/strategies/test_genetic_algorithm.py    |  10 +-
 test/test_runners.py                         |  16 ++
 4 files changed, 125 insertions(+), 58 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index c29c150b5..404c36ed9 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -11,6 +11,7 @@
 _options = dict(
     popsize=("population size", 20),
     maxiter=("maximum number of generations", 100),
+    constraint_aware=("constraint-aware optimization (True/False)", False),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
     mutation_chance=("chance to mutate is 1 in mutation_chance", 10),
 )
@@ -19,13 +20,15 @@
 def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
-    pop_size, generations, method, mutation_chance = common.get_options(options, _options)
+    pop_size, generations, constraint_aware, method, mutation_chance = common.get_options(options, _options)
     crossover = supported_methods[method]
 
+    GA = GeneticAlgorithm(pop_size, searchspace, constraint_aware, method, mutation_chance)
+
     best_score = 1e20
     cost_func = CostFunc(searchspace, tuning_options, runner)
 
-    population = list(list(p) for p in searchspace.get_random_sample(pop_size))
+    population = GA.generate_population()
 
     for generation in range(generations):
 
@@ -51,18 +54,19 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         if tuning_options.verbose:
             print("Generation %d, best_score %f" % (generation, best_score))
 
+        # build new population for next generation
         population = []
 
         # crossover and mutate
         while len(population) < pop_size:
-            dna1, dna2 = weighted_choice(weighted_population, 2)
+            dna1, dna2 = GA.weighted_choice(weighted_population, 2)
 
-            children = crossover(dna1, dna2)
+            children = GA.crossover(dna1, dna2)
 
             for child in children:
-                child = mutate(child, mutation_chance, searchspace)
+                child = GA.mutate(child)
 
-                if child not in population and searchspace.is_param_config_valid(tuple(child)):
+                if child not in population:
                     population.append(child)
 
                 if len(population) >= pop_size:
@@ -75,57 +79,94 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 tune.__doc__ = common.get_strategy_docstring("Genetic Algorithm", _options)
 
-
-def weighted_choice(population, n):
-    """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected."""
-
-    def random_index_betavariate(pop_size):
-        # has a higher probability of returning index of item at the head of the list
-        alpha = 1
-        beta = 2.5
-        return int(random.betavariate(alpha, beta) * pop_size)
-
-    def random_index_weighted(pop_size):
-        """Use weights to increase probability of selection."""
-        weights = [w for _, w in population]
-        # invert because lower is better
-        inverted_weights = [1.0 / w for w in weights]
-        prefix_sum = np.cumsum(inverted_weights)
-        total_weight = sum(inverted_weights)
-        randf = random.random() * total_weight
-        # return first index of prefix_sum larger than random number
-        return next(i for i, v in enumerate(prefix_sum) if v > randf)
-
-    random_index = random_index_betavariate
-
-    indices = [random_index(len(population)) for _ in range(n)]
-    chosen = []
-    for ind in indices:
-        while ind in chosen:
-            ind = random_index(len(population))
-        chosen.append(ind)
-
-    return [population[ind][0] for ind in chosen]
-
-
-def mutate(dna, mutation_chance, searchspace: Searchspace, cache=True):
-    """Mutate DNA with 1/mutation_chance chance."""
-    # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
-    if int(random.random() * mutation_chance) == 0:
-        if cache:
-            neighbors = searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
-        else:
-            neighbors = searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
-        if len(neighbors) > 0:
-            return list(random.choice(neighbors))
-    return dna
+class GeneticAlgorithm:
+
+    def __init__(self, pop_size, searchspace, constraint_aware=False, method="uniform", mutation_chance=10):
+        self.pop_size = pop_size
+        self.searchspace = searchspace
+        self.constraint_aware = constraint_aware
+        self.crossover_method = supported_methods[method]
+        self.mutation_chance = mutation_chance
+
+    def generate_population(self):
+        """ Constraint-aware population creation method """
+        return list(list(p) for p in self.searchspace.get_random_sample(self.pop_size))
+
+    def crossover(self, dna1, dna2):
+        """ Apply selected crossover method, repair dna if constraint-aware """
+        dna1, dna2 = self.crossover_method(dna1, dna2)
+        if self.constraint_aware:
+            return self.repair(dna1), self.repair(dna2)
+        return dna1, dna2
+
+    def weighted_choice(self, population, n):
+        """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected."""
+
+        def random_index_betavariate(pop_size):
+            # has a higher probability of returning index of item at the head of the list
+            alpha = 1
+            beta = 2.5
+            return int(random.betavariate(alpha, beta) * pop_size)
+
+        def random_index_weighted(pop_size):
+            """Use weights to increase probability of selection."""
+            weights = [w for _, w in population]
+            # invert because lower is better
+            inverted_weights = [1.0 / w for w in weights]
+            prefix_sum = np.cumsum(inverted_weights)
+            total_weight = sum(inverted_weights)
+            randf = random.random() * total_weight
+            # return first index of prefix_sum larger than random number
+            return next(i for i, v in enumerate(prefix_sum) if v > randf)
+
+        random_index = random_index_betavariate
+
+        indices = [random_index(len(population)) for _ in range(n)]
+        chosen = []
+        for ind in indices:
+            while ind in chosen:
+                ind = random_index(len(population))
+            chosen.append(ind)
+
+        return [population[ind][0] for ind in chosen]
+
+
+    def mutate(self, dna, cache=False):
+        """Mutate DNA with 1/mutation_chance chance."""
+        # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
+        if int(random.random() * self.mutation_chance) == 0:
+            if cache:
+                neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+            else:
+                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
+            if len(neighbors) > 0:
+                return list(random.choice(neighbors))
+        return dna
+
+
+    def repair(self, dna):
+        """ It is possible that crossover methods yield a configuration that is not valid. """
+        if not self.searchspace.is_param_config_valid(tuple(dna)):
+            # dna is not valid, try to repair it
+            # search for valid configurations neighboring this config
+            # start from strictly-adjacent to increasingly allowing more neighbors
+            for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method=neighbor_method)
+
+                # if we have found valid neighboring configurations, select one at random
+                if len(neighbors) > 0:
+                    new_dna = list(random.choice(neighbors))
+                    print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
+                    return new_dna
+
+        return dna
 
 
 def single_point_crossover(dna1, dna2):
     """Crossover dna1 and dna2 at a random index."""
     # check if you can do the crossovers using the neighbor index: check which valid parameter configuration is closest to the crossover, probably best to use "adjacent" as it is least strict?
     pos = int(random.random() * (len(dna1)))
-    return (dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:])
+    return dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:]
 
 
 def two_point_crossover(dna1, dna2):
@@ -137,7 +178,7 @@ def two_point_crossover(dna1, dna2):
     pos1, pos2 = sorted(random.sample(list(range(start, end)), 2))
     child1 = dna1[:pos1] + dna2[pos1:pos2] + dna1[pos2:]
     child2 = dna2[:pos1] + dna1[pos1:pos2] + dna2[pos2:]
-    return (child1, child2)
+    return child1, child2
 
 
 def uniform_crossover(dna1, dna2):
@@ -168,7 +209,7 @@ def disruptive_uniform_crossover(dna1, dna2):
                     child1[ind] = dna2[ind]
                     child2[ind] = dna1[ind]
                     swaps += 1
-    return (child1, child2)
+    return child1, child2
 
 
 supported_methods = {
@@ -177,3 +218,4 @@ def disruptive_uniform_crossover(dna1, dna2):
     "uniform": uniform_crossover,
     "disruptive_uniform": disruptive_uniform_crossover,
 }
+
diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py
index a4c521746..c620ab925 100644
--- a/kernel_tuner/strategies/greedy_ils.py
+++ b/kernel_tuner/strategies/greedy_ils.py
@@ -1,9 +1,9 @@
 """A simple greedy iterative local search algorithm for parameter search."""
+import random
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies import common
 from kernel_tuner.strategies.common import CostFunc
-from kernel_tuner.strategies.genetic_algorithm import mutate
 from kernel_tuner.strategies.hillclimbers import base_hillclimb
 
 _options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"),
@@ -58,9 +58,14 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 tune.__doc__ = common.get_strategy_docstring("Greedy Iterative Local Search (ILS)", _options)
 
+def mutate(indiv, searchspace: Searchspace):
+    neighbors = searchspace.get_neighbors_no_cache(tuple(indiv), neighbor_method="Hamming")
+    return list(random.choice(neighbors))
+
+
 def random_walk(indiv, permutation_size, no_improve, last_improve, searchspace: Searchspace):
     if last_improve >= no_improve:
         return searchspace.get_random_sample(1)[0]
     for _ in range(permutation_size):
-        indiv = mutate(indiv, 0, searchspace, cache=False)
+        indiv = mutate(indiv, searchspace)
     return indiv
diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
index cb07f8d7f..940150796 100644
--- a/test/strategies/test_genetic_algorithm.py
+++ b/test/strategies/test_genetic_algorithm.py
@@ -14,10 +14,12 @@ def test_weighted_choice():
     pop = searchspace.get_random_sample(pop_size)
     weighted_pop = [[p, i] for i, p in enumerate(pop)]
 
-    result = ga.weighted_choice(weighted_pop, 1)
+    GA = ga.GeneticAlgorithm(pop_size, searchspace)
+
+    result = GA.weighted_choice(weighted_pop, 1)
     assert result[0] in pop
 
-    result = ga.weighted_choice(weighted_pop, 2)
+    result = GA.weighted_choice(weighted_pop, 2)
     print(result)
     assert result[0] in pop
     assert result[1] in pop
@@ -43,7 +45,9 @@ def test_random_population():
 def test_mutate():
     pop = searchspace.get_random_sample(1)
 
-    mutant = ga.mutate(pop[0], 10, searchspace)
+    GA = ga.GeneticAlgorithm(1, searchspace)
+
+    mutant = GA.mutate(pop[0])
     assert len(pop[0]) == len(mutant)
     assert mutant[0] in tune_params["x"]
     assert mutant[1] in tune_params["y"]
diff --git a/test/test_runners.py b/test/test_runners.py
index 527c1d252..dd4a7f52b 100644
--- a/test/test_runners.py
+++ b/test/test_runners.py
@@ -140,6 +140,22 @@ def test_diff_evo(env):
     assert len(result) > 0
 
 
+def test_constraint_aware_GA(env):
+    options = dict(method="uniform",
+                   constraint_aware=True,
+                   popsize=5,
+                   maxiter=2,
+                   mutation_chance=10,
+                   max_fevals=10)
+    result, _ = tune_kernel(*env,
+                            strategy="genetic_algorithm",
+                            strategy_options=options,
+                            verbose=True,
+                            cache=cache_filename,
+                            simulation_mode=True)
+    assert len(result) > 0
+
+
 @skip_if_no_pycuda
 def test_time_keeping(env):
     kernel_name, kernel_string, size, args, tune_params = env

From 71e3de8b623982c6d80e411e36f94c5df9c95199 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Wed, 30 Apr 2025 22:43:55 +0200
Subject: [PATCH 154/168] added non-constraint-aware initialization and
 mutation for comparison

---
 kernel_tuner/strategies/genetic_algorithm.py | 40 ++++++++++++++++----
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 404c36ed9..19f399dc6 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -11,7 +11,7 @@
 _options = dict(
     popsize=("population size", 20),
     maxiter=("maximum number of generations", 100),
-    constraint_aware=("constraint-aware optimization (True/False)", False),
+    constraint_aware=("constraint-aware optimization (True/False)", True),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
     mutation_chance=("chance to mutate is 1 in mutation_chance", 10),
 )
@@ -36,7 +36,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         weighted_population = []
         for dna in population:
             try:
-                time = cost_func(dna, check_restrictions=False)
+                # if we are not constraint-aware we should check restrictions upon evaluation
+                time = cost_func(dna, check_restrictions=not constraint_aware)
             except util.StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
@@ -84,13 +85,24 @@ class GeneticAlgorithm:
     def __init__(self, pop_size, searchspace, constraint_aware=False, method="uniform", mutation_chance=10):
         self.pop_size = pop_size
         self.searchspace = searchspace
+        self.tune_params = searchspace.tune_params.copy()
         self.constraint_aware = constraint_aware
         self.crossover_method = supported_methods[method]
         self.mutation_chance = mutation_chance
 
     def generate_population(self):
         """ Constraint-aware population creation method """
-        return list(list(p) for p in self.searchspace.get_random_sample(self.pop_size))
+        if self.constraint_aware:
+            pop = list(list(p) for p in self.searchspace.get_random_sample(self.pop_size))
+        else:
+            pop = []
+            dna_size = len(self.tune_params)
+            for _ in range(self.pop_size):
+                dna = []
+                for key in self.tune_params:
+                    dna.append(random.choice(self.tune_params[key]))
+                pop.append(dna)
+        return pop
 
     def crossover(self, dna1, dna2):
         """ Apply selected crossover method, repair dna if constraint-aware """
@@ -135,12 +147,24 @@ def mutate(self, dna, cache=False):
         """Mutate DNA with 1/mutation_chance chance."""
         # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list
         if int(random.random() * self.mutation_chance) == 0:
-            if cache:
-                neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+            if self.constraint_aware:
+                if cache:
+                    neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming")
+                else:
+                    neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
+                if len(neighbors) > 0:
+                    return list(random.choice(neighbors))
             else:
-                neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming")
-            if len(neighbors) > 0:
-                return list(random.choice(neighbors))
+                # select a tunable parameter at random
+                mutate_index = random.randint(0, len(self.tune_params)-1)
+                mutate_key = list(self.tune_params.keys())[mutate_index]
+                # get all possible values for this parameter and remove current value
+                new_val_options = self.tune_params[mutate_key].copy()
+                new_val_options.remove(dna[mutate_index])
+                # pick new value at random
+                if len(new_val_options) > 0:
+                    new_val = random.choice(new_val_options)
+                    dna[mutate_index] = new_val
         return dna
 
 

From 67a5070a9c752e946dab03e14727713ce1de5620 Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Thu, 1 May 2025 08:50:57 +0200
Subject: [PATCH 155/168] fix test_mutate

---
 test/strategies/test_genetic_algorithm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py
index 940150796..d16ad11ce 100644
--- a/test/strategies/test_genetic_algorithm.py
+++ b/test/strategies/test_genetic_algorithm.py
@@ -43,10 +43,11 @@ def test_random_population():
 
 
 def test_mutate():
-    pop = searchspace.get_random_sample(1)
 
     GA = ga.GeneticAlgorithm(1, searchspace)
 
+    pop = GA.generate_population()
+
     mutant = GA.mutate(pop[0])
     assert len(pop[0]) == len(mutant)
     assert mutant[0] in tune_params["x"]

From 939ea19a8e7201f5ac34111cfa82a7e247767edc Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Mon, 12 May 2025 15:58:35 +0200
Subject: [PATCH 156/168] constraint-aware variants for pso, firefly, and sa

---
 kernel_tuner/strategies/common.py             | 37 +++++++++-
 kernel_tuner/strategies/firefly_algorithm.py  | 12 ++--
 kernel_tuner/strategies/pso.py                | 12 ++--
 .../strategies/simulated_annealing.py         | 69 +++++++++++++++----
 4 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index d01eae937..76ad8a568 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -3,6 +3,7 @@
 from time import perf_counter
 
 import numpy as np
+from scipy.spatial import distance
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
@@ -88,8 +89,17 @@ def __call__(self, x, check_restrictions=True):
 
         # else check if this is a legal (non-restricted) configuration
         if check_restrictions and self.searchspace.restrictions:
+            legal = self.searchspace.is_param_config_valid(tuple(params))
             params_dict = dict(zip(self.searchspace.tune_params.keys(), params))
-            legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose)
+
+            if "constraint_aware" in self.tuning_options.strategy_options and self.tuning_options.strategy_options["constraint_aware"]:
+                # attempt to repair
+                new_params = unscale_and_snap_to_nearest_valid(x, params, self.searchspace, self.tuning_options.eps)
+                if new_params:
+                    params = new_params
+                    legal = True
+                    x_int = ",".join([str(i) for i in params])
+
             if not legal:
                 result = params_dict
                 result[self.tuning_options.objective] = util.InvalidConfig()
@@ -243,3 +253,28 @@ def scale_from_params(params, tune_params, eps):
     for i, v in enumerate(tune_params.values()):
         x[i] = 0.5 * eps + v.index(params[i])*eps
     return x
+
+
+
+def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps):
+    """Helper func to snap to the nearest valid configuration"""
+
+    # params is nearest unscaled point, but is not valid
+    neighbors = get_neighbors(params, searchspace)
+
+    if neighbors:
+        # sort on distance to x
+        neighbors.sort(key=lambda y: distance.euclidean(x,scale_from_params(y, searchspace.tune_params, eps)))
+
+        # return closest valid neighbor
+        return neighbors[0]
+
+    return []
+
+
+def get_neighbors(params, searchspace):
+    for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+        neighbors = searchspace.get_neighbors_no_cache(tuple(params), neighbor_method=neighbor_method)
+        if len(neighbors) > 0:
+            return neighbors
+    return []
diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py
index dc43aae6f..9971df047 100644
--- a/kernel_tuner/strategies/firefly_algorithm.py
+++ b/kernel_tuner/strategies/firefly_algorithm.py
@@ -13,7 +13,8 @@
                        maxiter=("Maximum number of iterations", 100),
                        B0=("Maximum attractiveness", 1.0),
                        gamma=("Light absorption coefficient", 1.0),
-                       alpha=("Randomization parameter", 0.2))
+                       alpha=("Randomization parameter", 0.2),
+                       constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
@@ -23,7 +24,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     # using this instead of get_bounds because scaling is used
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
-    num_particles, maxiter, B0, gamma, alpha = common.get_options(tuning_options.strategy_options, _options)
+    num_particles, maxiter, B0, gamma, alpha, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
 
     best_score_global = sys.float_info.max
     best_position_global = []
@@ -34,9 +35,10 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         swarm.append(Firefly(bounds))
 
     # ensure particles start from legal points
-    population = list(list(p) for p in searchspace.get_random_sample(num_particles))
-    for i, particle in enumerate(swarm):
-        particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
+    if constraint_aware:
+        population = list(list(p) for p in searchspace.get_random_sample(num_particles))
+        for i, particle in enumerate(swarm):
+            particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
 
     # compute initial intensities
     for j in range(num_particles):
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 5b0df1429..efcd63815 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -13,7 +13,8 @@
                        maxiter=("Maximum number of iterations", 100),
                        w=("Inertia weight constant", 0.5),
                        c1=("Cognitive constant", 2.0),
-                       c2=("Social constant", 1.0))
+                       c2=("Social constant", 1.0),
+                       constraint_aware=("constraint-aware optimization (True/False)", False))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
@@ -24,7 +25,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     bounds, _, eps = cost_func.get_bounds_x0_eps()
 
 
-    num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options)
+    num_particles, maxiter, w, c1, c2, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
 
     best_score_global = sys.float_info.max
     best_position_global = []
@@ -35,9 +36,10 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         swarm.append(Particle(bounds))
 
     # ensure particles start from legal points
-    population = list(list(p) for p in searchspace.get_random_sample(num_particles))
-    for i, particle in enumerate(swarm):
-        particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
+    if constraint_aware:
+        population = list(list(p) for p in searchspace.get_random_sample(num_particles))
+        for i, particle in enumerate(swarm):
+            particle.position = scale_from_params(population[i], searchspace.tune_params, eps)
 
     # start optimization
     for i in range(maxiter):
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index dce929b7b..b380e5efb 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -10,16 +10,17 @@
 from kernel_tuner.strategies.common import CostFunc
 
 _options = dict(T=("Starting temperature", 1.0),
-                       T_min=("End temperature", 0.001),
-                       alpha=("Alpha parameter", 0.995),
-                       maxiter=("Number of iterations within each annealing step", 1))
+                T_min=("End temperature", 0.001),
+                alpha=("Alpha parameter", 0.995),
+                maxiter=("Number of iterations within each annealing step", 1),
+                constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
     # SA works with real parameter values and does not need scaling
     cost_func = CostFunc(searchspace, tuning_options, runner)
 
     # optimization parameters
-    T, T_min, alpha, niter = common.get_options(tuning_options.strategy_options, _options)
+    T, T_min, alpha, niter, constraint_aware = common.get_options(tuning_options.strategy_options, _options)
     T_start = T
 
     # compute how many iterations would be needed to complete the annealing schedule
@@ -30,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     max_feval = tuning_options.strategy_options.get("max_fevals", max_iter)
 
     # get random starting point and evaluate cost
-    pos = list(searchspace.get_random_sample(1)[0])
+    pos = generate_starting_point(searchspace, constraint_aware)
     old_cost = cost_func(pos, check_restrictions=False)
 
     # main optimization loop
@@ -46,9 +47,9 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
         for _ in range(niter):
 
-            new_pos = neighbor(pos, searchspace)
+            new_pos = neighbor(pos, searchspace, constraint_aware)
             try:
-                new_cost = cost_func(new_pos, check_restrictions=False)
+                new_cost = cost_func(new_pos, check_restrictions=not constraint_aware)
             except util.StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
@@ -73,7 +74,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             stuck = 0
         c_old = c
         if stuck > 100:
-            pos = list(searchspace.get_random_sample(1)[0])
+            pos = generate_starting_point(searchspace, constraint_aware)
             stuck = 0
 
         # safeguard
@@ -103,11 +104,49 @@ def acceptance_prob(old_cost, new_cost, T, tuning_options):
     return np.exp(((old_cost-new_cost)/old_cost)/T)
 
 
-def neighbor(pos, searchspace: Searchspace):
+def neighbor(pos, searchspace: Searchspace, constraint_aware=True):
     """Return a random neighbor of pos."""
-    # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions
-    neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent')
-    if len(neighbors) > 0:
-        return list(random.choice(neighbors))
-    # if there are no neighbors, return a random configuration
-    return list(searchspace.get_random_sample(1)[0])
+
+    if constraint_aware:
+        # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions
+        neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent')
+        if len(neighbors) > 0:
+            return list(random.choice(neighbors))
+        # if there are no neighbors, return a random configuration
+        return list(searchspace.get_random_sample(1)[0])
+
+    else:
+        tune_params = searchspace.tune_params
+        size = len(pos)
+        pos_out = []
+        # random mutation
+        # expected value is set that values all dimensions attempt to get mutated
+        for i in range(size):
+            key = list(tune_params.keys())[i]
+            values = tune_params[key]
+
+            if random.random() < 0.2:  #replace with random value
+                new_value = random_val(i, tune_params)
+            else: #adjacent value
+                ind = values.index(pos[i])
+                if random.random() > 0.5:
+                    ind += 1
+                else:
+                    ind -= 1
+                ind = min(max(ind, 0), len(values)-1)
+                new_value = values[ind]
+
+            pos_out.append(new_value)
+        return pos_out
+
+def random_val(index, tune_params):
+    """return a random value for a parameter"""
+    key = list(tune_params.keys())[index]
+    return random.choice(tune_params[key])
+
+def generate_starting_point(searchspace: Searchspace, constraint_aware=True):
+    if constraint_aware:
+        return list(searchspace.get_random_sample(1)[0])
+    else:
+        tune_params = searchspace.tune_params
+        return [random_val(i, tune_params) for i in range(len(tune_params))]

From b358265ba8c13af18fd684e105cb5a636f55defa Mon Sep 17 00:00:00 2001
From: Ben van Werkhoven <b.vanwerkhoven@esciencecenter.nl>
Date: Mon, 12 May 2025 16:01:28 +0200
Subject: [PATCH 157/168] remove unused variable

---
 kernel_tuner/strategies/genetic_algorithm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 19f399dc6..3932baaa1 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -21,7 +21,6 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
     pop_size, generations, constraint_aware, method, mutation_chance = common.get_options(options, _options)
-    crossover = supported_methods[method]
 
     GA = GeneticAlgorithm(pop_size, searchspace, constraint_aware, method, mutation_chance)
 

From 2d24ae97126f82bfad395447b9e11bb984d31f0b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 00:42:17 +0200
Subject: [PATCH 158/168] Added objective performance keys

---
 kernel_tuner/backends/hypertuner.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index 33a0e639c..a02e79aca 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -69,22 +69,26 @@ def compile(self, kernel_instance):
             {
                 "name": "dedispersion_milo",
                 "folder": folder,
-                "input_file": "dedispersion_milo.json"
+                "input_file": "dedispersion_milo.json",
+                "objective_performance_keys": ["time"]
             },
             {
                 "name": "hotspot_milo",
                 "folder": folder,
-                "input_file": "hotspot_milo.json"
+                "input_file": "hotspot_milo.json",
+                "objective_performance_keys": ["GFLOP/s"]
             },
             {
                 "name": "convolution_milo",
                 "folder": folder,
-                "input_file": "convolution_milo.json"
+                "input_file": "convolution_milo.json",
+                "objective_performance_keys": ["time"]
             },
             {
                 "name": "gemm_milo",
                 "folder": folder,
-                "input_file": "gemm_milo.json"
+                "input_file": "gemm_milo.json",
+                "objective_performance_keys": ["time"]
             }
         ]
 

From 77676c8f4937851e78b30d3c77441718a9805cd4 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 10:56:48 +0200
Subject: [PATCH 159/168] Support for time-based cutoff with T1 format

---
 kernel_tuner/interface.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index fc678fdc9..db8ab59ae 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -891,10 +891,14 @@ def tune_kernel_T1(
             strategy_options[attribute["Name"]] = attribute["Value"]
     if "Budget" in inputs:
         budget = inputs["Budget"][0]
-        assert budget["Type"] == "ConfigurationCount"
         if strategy_options is None:
             strategy_options = {}
-        strategy_options["max_fevals"] = budget["BudgetValue"]
+        if budget["Type"] == "ConfigurationCount":
+            strategy_options["max_fevals"] = budget["BudgetValue"]
+        elif budget["Type"] == "TuningDuration":
+            strategy_options["time_limit"] = budget["BudgetValue"]  # both are in seconds
+        else:
+            raise NotImplementedError(f"Budget type in {budget} is not supported")
 
     # set the cache path
     if cache_filepath is None and "SimulationInput" in kernelspec:
@@ -971,7 +975,6 @@ def tune_kernel_T1(
             raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}")
 
     # tune with the converted inputs
-    # TODO get_t4_results calls once available in T1
     results, env = tune_kernel(
         kernel_name,
         kernel_source,

From 919626647f83cd90c3a266a5f1a21cf7d2763305 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 17:09:33 +0200
Subject: [PATCH 160/168] Improvements to constraint-aware strategies

---
 kernel_tuner/strategies/genetic_algorithm.py | 28 ++++++++++----------
 kernel_tuner/strategies/pso.py               |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 026202d0f..8c6fbde41 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -12,32 +12,31 @@
 _options = dict(
     popsize=("population size", 30),
     maxiter=("maximum number of generations", 30),
-    constraint_aware=("constraint-aware optimization (True/False)", True),
     method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
     mutation_chance=("chance to mutate is 1 in mutation_chance", 20),
+    constraint_aware=("constraint-aware optimization (True/False)", True),
 )
 
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 
     options = tuning_options.strategy_options
-    pop_size, generations, constraint_aware, method, mutation_chance = common.get_options(options, _options)
+    pop_size, generations, method, mutation_chance, constraint_aware = common.get_options(options, _options)
 
-    GA = GeneticAlgorithm(pop_size, searchspace, constraint_aware, method, mutation_chance)
+    # if necessary adjust the popsize to a sensible value based on search space size
+    pop_size = min(round((searchspace.size / generations) * 3), pop_size)
 
-    # if left to the default, adjust the popsize to a sensible value for small search spaces
-    if pop_size == _options["popsize"][1]:
-        pop_size = min(round(searchspace.size / 2), pop_size)
-    else:
-        # otherwise, just make sure it doesn't exceed the search space size
-        pop_size = min(searchspace.size, pop_size)
+    GA = GeneticAlgorithm(pop_size, searchspace, method, mutation_chance, constraint_aware)
 
     best_score = 1e20
     cost_func = CostFunc(searchspace, tuning_options, runner)
+    num_evaluated = 0
 
     population = GA.generate_population()
 
     for generation in range(generations):
+        if any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]):
+            raise ValueError(f"Generation {generation}/{generations}, population validity: {[searchspace.is_param_config_valid(tuple(dna)) for dna in population]}")
 
         # determine fitness of population members
         weighted_population = []
@@ -45,7 +44,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             try:
                 # if we are not constraint-aware we should check restrictions upon evaluation
                 time = cost_func(dna, check_restrictions=not constraint_aware)
-            except util.StopCriterionReached as e:
+                num_evaluated += 1
+            except StopCriterionReached as e:
                 if tuning_options.verbose:
                     print(e)
                 return cost_func.results
@@ -68,7 +68,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
         population = []
 
         # crossover and mutate
-        while len(population) < pop_size:
+        while len(population) < pop_size and searchspace.size > num_evaluated + len(population):
             dna1, dna2 = GA.weighted_choice(weighted_population, 2)
 
             children = GA.crossover(dna1, dna2)
@@ -76,7 +76,7 @@ def tune(searchspace: Searchspace, runner, tuning_options):
             for child in children:
                 child = GA.mutate(child)
 
-                if child not in population:
+                if child not in population and searchspace.is_param_config_valid(tuple(child)):
                     population.append(child)
 
                 if len(population) >= pop_size:
@@ -91,13 +91,13 @@ def tune(searchspace: Searchspace, runner, tuning_options):
 
 class GeneticAlgorithm:
 
-    def __init__(self, pop_size, searchspace, constraint_aware=False, method="uniform", mutation_chance=10):
+    def __init__(self, pop_size, searchspace, method="uniform", mutation_chance=10, constraint_aware=True):
         self.pop_size = pop_size
         self.searchspace = searchspace
         self.tune_params = searchspace.tune_params.copy()
-        self.constraint_aware = constraint_aware
         self.crossover_method = supported_methods[method]
         self.mutation_chance = mutation_chance
+        self.constraint_aware = constraint_aware
 
     def generate_population(self):
         """ Constraint-aware population creation method """
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index 0fd9c874d..a7b75ed48 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -16,7 +16,7 @@
     w=("Inertia weight constant", 0.5),
     c1=("Cognitive constant", 3.0),
     c2=("Social constant", 1.5),
-    constraint_aware=("constraint-aware optimization (True/False)", False))
+    constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
 

From 83df9482826bc3373a450da66c09f0f5be5d841b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Tue, 13 May 2025 17:14:11 +0200
Subject: [PATCH 161/168] Implemented passing settings to hyperparameter tuner,
 improved hyperparam test, improved defaults

---
 kernel_tuner/backends/hypertuner.py | 87 +++++++++++++++++------------
 kernel_tuner/core.py                |  7 ++-
 test/test_hyper.py                  | 21 ++++++-
 3 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index a02e79aca..d4355d5ba 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -35,37 +35,18 @@ class HypertunerFunctions(Backend):
     """Class for executing hyperparameter tuning."""
     units = {}
 
-    def __init__(self, iterations):
+    def __init__(self, iterations, compiler_options=None):
         self.iterations = iterations
+        self.compiler_options = compiler_options
         self.observers = [ScoreObserver(self)]
         self.name = platform.processor()
         self.max_threads = 1024
         self.last_score = None
 
-        # set the environment options
-        env = dict()
-        env["iterations"] = self.iterations
-        self.env = env
-
-        # check for the methodology package
-        if methodology_available is not True:
-            raise ImportError("Unable to import the autotuning methodology, run `pip install autotuning_methodology`.")
-
-    def ready_argument_list(self, arguments):
-        arglist = super().ready_argument_list(arguments)
-        if arglist is None:
-            arglist = []
-        return arglist
-    
-    def compile(self, kernel_instance):
-        super().compile(kernel_instance)
-        path = Path(__file__).parent.parent.parent / "hyperparamtuning"
-        path.mkdir(exist_ok=True)
-
-        # TODO get applications & GPUs args from benchmark
-        gpus = ["A100", "A4000", "MI250X"]
+        # set the defaults
+        self.gpus = ["A100", "A4000", "MI250X"]
         folder = "../autotuning_methodology/benchmark_hub/kernels"
-        applications = [
+        self.applications = [
             {
                 "name": "dedispersion_milo",
                 "folder": folder,
@@ -91,6 +72,51 @@ def compile(self, kernel_instance):
                 "objective_performance_keys": ["time"]
             }
         ]
+        # any additional settings
+        self.override = { 
+            "experimental_groups_defaults": { 
+                "repeats": 25,
+                "samples": self.iterations,
+                "minimum_fraction_of_budget_valid": 0.01, 
+            },
+            "statistics_settings": {
+                "cutoff_percentile": 0.95,
+                "cutoff_percentile_start": 0.01,
+                "cutoff_type": "time",
+                "objective_time_keys": [
+                    "all"
+                ]
+            }
+        }
+
+        # override the defaults with compiler options if provided
+        if self.compiler_options is not None:
+            if "gpus" in self.compiler_options:
+                self.gpus = self.compiler_options["gpus"]
+            if "applications" in self.compiler_options:
+                self.applications = self.compiler_options["applications"]
+            if "override" in self.compiler_options:
+                self.override = self.compiler_options["override"]
+
+        # set the environment options
+        env = dict()
+        env["iterations"] = self.iterations
+        self.env = env
+
+        # check for the methodology package
+        if methodology_available is not True:
+            raise ImportError("Unable to import the autotuning methodology, run `pip install autotuning_methodology`.")
+
+    def ready_argument_list(self, arguments):
+        arglist = super().ready_argument_list(arguments)
+        if arglist is None:
+            arglist = []
+        return arglist
+    
+    def compile(self, kernel_instance):
+        super().compile(kernel_instance)
+        path = Path(__file__).parent.parent.parent / "hyperparamtuning"
+        path.mkdir(exist_ok=True)
 
         # strategy settings
         strategy: str = kernel_instance.arguments[0]
@@ -104,18 +130,9 @@ def compile(self, kernel_instance):
             'search_method_hyperparameters': hyperparams
         }]
 
-        # any additional settings
-        override = { 
-            "experimental_groups_defaults": { 
-                "repeats": 25,
-                "samples": self.iterations,
-                "minimum_fraction_of_budget_valid": 0.01, 
-            }
-        }
-
         name = kernel_instance.name if len(kernel_instance.name) > 0 else kernel_instance.kernel_source.kernel_name
-        experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, applications, gpus, 
-                                                        override=override, generate_unique_file=True, overwrite_existing_file=True)
+        experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, self.applications, self.gpus, 
+                                                        override=self.override, generate_unique_file=True, overwrite_existing_file=True)
         return str(experiments_filepath)
     
     def start_event(self):
diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index a7a642da6..c203723b7 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -324,10 +324,13 @@ def __init__(
                 observers=observers,
             )
         elif lang.upper() == "HYPERTUNER":
-            dev = HypertunerFunctions(iterations=iterations)
+            dev = HypertunerFunctions(
+                iterations=iterations,
+                compiler_options=compiler_options
+            )
             self.requires_warmup = False
         else:
-            raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")
+            raise NotImplementedError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")
         self.dev = dev
 
         # look for NVMLObserver and TegraObserver in observers, if present, enable special tunable parameters through nvml/tegra
diff --git a/test/test_hyper.py b/test/test_hyper.py
index d34294585..f0dcdae5b 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -15,6 +15,25 @@ def test_hyper(env):
 
     target_strategy = "genetic_algorithm"
 
-    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True, cache=None)
+    compiler_options = {
+        "gpus": ["A100", "MI250X"],
+        "override": { 
+            "experimental_groups_defaults": { 
+                "repeats": 1,
+                "samples": 1,
+                "minimum_fraction_of_budget_valid": 0.01, 
+            },
+            "statistics_settings": {
+                "cutoff_percentile": 0.90,
+                "cutoff_percentile_start": 0.01,
+                "cutoff_type": "time",
+                "objective_time_keys": [
+                    "all"
+                ]
+            }
+        }
+    }
+
+    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, compiler_options=compiler_options, verbose=True, cache=None)
     assert len(result) == 2
     assert 'best_config' in env

From f6811ab712863738182ab986256c3a8df4cb0e7d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 14 May 2025 13:31:52 +0200
Subject: [PATCH 162/168] Added firefly to hyperparameter tuning, various minor
 improvements

---
 kernel_tuner/backends/hypertuner.py          | 3 ++-
 kernel_tuner/hyper.py                        | 8 ++++++++
 kernel_tuner/strategies/genetic_algorithm.py | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index d4355d5ba..e33a9087d 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -77,7 +77,8 @@ def __init__(self, iterations, compiler_options=None):
             "experimental_groups_defaults": { 
                 "repeats": 25,
                 "samples": self.iterations,
-                "minimum_fraction_of_budget_valid": 0.01, 
+                "minimum_fraction_of_budget_valid": 0.1,
+                "minimum_number_of_valid_search_iterations": 10,
             },
             "statistics_settings": {
                 "cutoff_percentile": 0.95,
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index ed61558e5..9942a2414 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -107,6 +107,14 @@ def put_if_not_present(target_dict, key, value):
             'c1': [1.0, 2.0, 3.0],
             'c2': [0.5, 1.0, 1.5]
         }
+    elif strategy_to_tune.lower() == "firefly_algorithm":
+        hyperparams = {
+            'popsize': [10, 20, 30],
+            'maxiter': [50, 100, 150],
+            'B0': [0.5, 1.0, 1.5],
+            'gamma': [0.1, 0.25, 0.5],
+            'alpha': [0.1, 0.2, 0.3]
+        }
     elif strategy_to_tune.lower() == "greedy_ils":
         hyperparams = {
             'neighbor': ['Hamming', 'adjacent'],
diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 8c6fbde41..1cf0ca32d 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -189,7 +189,7 @@ def repair(self, dna):
                 # if we have found valid neighboring configurations, select one at random
                 if len(neighbors) > 0:
                     new_dna = list(random.choice(neighbors))
-                    print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
+                    # print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}")
                     return new_dna
 
         return dna

From e4af9f7d30ba1ff08a65731943d431851038a5ea Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 16 May 2025 00:49:40 +0200
Subject: [PATCH 163/168] Added explicit restrictions definition to
 hyperparameter tuning

---
 kernel_tuner/backends/hypertuner.py | 2 +-
 kernel_tuner/hyper.py               | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index e33a9087d..ce090e944 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -78,7 +78,7 @@ def __init__(self, iterations, compiler_options=None):
                 "repeats": 25,
                 "samples": self.iterations,
                 "minimum_fraction_of_budget_valid": 0.1,
-                "minimum_number_of_valid_search_iterations": 10,
+                "minimum_number_of_valid_search_iterations": 5,
             },
             "statistics_settings": {
                 "cutoff_percentile": 0.95,
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 9942a2414..bb957c01b 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -17,7 +17,7 @@ def randpath():
         path = randpath()
     return path
 
-def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs):
+def tune_hyper_params(target_strategy: str, hyper_params: dict, restrictions: list, *args, **kwargs):
     """Tune hyperparameters for a given strategy and kernel.
 
     This function is to be called just like tune_kernel, except that you specify a strategy
@@ -80,7 +80,7 @@ def put_if_not_present(target_dict, key, value):
     name = f"hyperparamtuning_{target_strategy.lower()}"
 
     # execute the hyperparameter tuning
-    result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, *args, lang='Hypertuner',
+    result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, restrictions=restrictions, *args, lang='Hypertuner',
                                     objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs)
     
     # remove the temporary cachefile and return only unique results in order
@@ -99,6 +99,7 @@ def put_if_not_present(target_dict, key, value):
     strategy_to_tune = args.strategy_to_tune
 
     # select the hyperparameter parameters for the selected optimization algorithm
+    restrictions = []
     if strategy_to_tune.lower() == "pso":
         hyperparams = {
             'popsize': [10, 20, 30],
@@ -169,6 +170,6 @@ def put_if_not_present(target_dict, key, value):
         raise ValueError(f"Invalid argument {strategy_to_tune=}")
 
     # run the hyperparameter tuning
-    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams)
+    result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams, restrictions=restrictions)
     print(result)
     print(env['best_config'])

From 5f3b6fcae0ee690b0923fd79f0f338dc19396dcc Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Fri, 16 May 2025 18:20:16 +0200
Subject: [PATCH 164/168] Updated tune_kernel_T1 to be more broadly applicable

---
 kernel_tuner/interface.py | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index db8ab59ae..ae8927f3b 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -870,29 +870,37 @@ def tune_kernel_T1(
     simulation_mode=False,
     output_T4=True,
     iterations=7,
-    strategy_options=None,
-):
-    """Call the tune function with a T1 input file."""
+    device=None,
+    strategy: str=None,
+    strategy_options: dict={},
+) -> tuple:
+    """
+    Call the tune function with a T1 input file.
+    
+        The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used.
+    """
     inputs = get_input_file(input_filepath)
     kernelspec: dict = inputs["KernelSpecification"]
     kernel_name: str = kernelspec["KernelName"]
     kernel_filepath = Path(kernelspec["KernelFile"])
     kernel_source = (
-        kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent.parent / kernel_filepath
+        kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent / kernel_filepath
+    )
+    kernel_source = (
+        kernel_source if kernel_source.exists() else Path(input_filepath).parent.parent / kernel_filepath
     )
     assert kernel_source.exists(), f"KernelFile '{kernel_source}' does not exist at {kernel_source.resolve()}"
     language: str = kernelspec["Language"]
     problem_size = kernelspec["ProblemSize"]
-    device = kernelspec["Device"]["Name"]
-    strategy = inputs["Search"]["Name"]
-    if "Attributes" in inputs["Search"]:
-        strategy_options = {}
-        for attribute in inputs["Search"]["Attributes"]:
-            strategy_options[attribute["Name"]] = attribute["Value"]
+    if device is None:
+        device = kernelspec["Device"]["Name"]
+    if strategy is None:
+        strategy = inputs["Search"]["Name"]
+        if "Attributes" in inputs["Search"]:
+            for attribute in inputs["Search"]["Attributes"]:
+                strategy_options[attribute["Name"]] = attribute["Value"]
     if "Budget" in inputs:
         budget = inputs["Budget"][0]
-        if strategy_options is None:
-            strategy_options = {}
         if budget["Type"] == "ConfigurationCount":
             strategy_options["max_fevals"] = budget["BudgetValue"]
         elif budget["Type"] == "TuningDuration":

From 7f3a4a3dc05b3b963faa5691d024b08c028cb6b9 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Sat, 24 May 2025 12:53:53 +0200
Subject: [PATCH 165/168] Updated hyperparameters to newly tuned defaults

---
 kernel_tuner/strategies/genetic_algorithm.py   | 8 ++++----
 kernel_tuner/strategies/pso.py                 | 6 +++---
 kernel_tuner/strategies/simulated_annealing.py | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 1cf0ca32d..2e6104773 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -10,10 +10,10 @@
 from kernel_tuner.strategies.common import CostFunc
 
 _options = dict(
-    popsize=("population size", 30),
-    maxiter=("maximum number of generations", 30),
-    method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"),
-    mutation_chance=("chance to mutate is 1 in mutation_chance", 20),
+    popsize=("population size", 20),
+    maxiter=("maximum number of generations", 150),
+    method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "single_point"),
+    mutation_chance=("chance to mutate is 1 in mutation_chance", 5),
     constraint_aware=("constraint-aware optimization (True/False)", True),
 )
 
diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py
index a7b75ed48..a02aed1c5 100644
--- a/kernel_tuner/strategies/pso.py
+++ b/kernel_tuner/strategies/pso.py
@@ -11,11 +11,11 @@
 from kernel_tuner.strategies.common import CostFunc, scale_from_params
 
 _options = dict(
-    popsize=("Population size", 20),
-    maxiter=("Maximum number of iterations", 150),
+    popsize=("Population size", 30),
+    maxiter=("Maximum number of iterations", 100),
     w=("Inertia weight constant", 0.5),
     c1=("Cognitive constant", 3.0),
-    c2=("Social constant", 1.5),
+    c2=("Social constant", 0.5),
     constraint_aware=("constraint-aware optimization (True/False)", True))
 
 def tune(searchspace: Searchspace, runner, tuning_options):
diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py
index 741800d24..d01ba7e4f 100644
--- a/kernel_tuner/strategies/simulated_annealing.py
+++ b/kernel_tuner/strategies/simulated_annealing.py
@@ -11,7 +11,7 @@
 
 
 _options = dict(T=("Starting temperature", 0.5),
-                T_min=("End temperature", 0.0001),
+                T_min=("End temperature", 0.001),
                 alpha=("Alpha parameter", 0.9975),
                 maxiter=("Number of iterations within each annealing step", 2),
                 constraint_aware=("constraint-aware optimization (True/False)", True))

From 80a5b62db227363496a3764991525018398ccf86 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 28 May 2025 16:18:28 +0200
Subject: [PATCH 166/168] Set default arguments if not provided

---
 kernel_tuner/hyper.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index bb957c01b..00ee42795 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -68,11 +68,12 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, restrictions: li
     def put_if_not_present(target_dict, key, value):
         target_dict[key] = value if key not in target_dict else target_dict[key]
 
+    # set default arguments if not provided
     put_if_not_present(kwargs, "verbose", True)
     put_if_not_present(kwargs, "quiet", False)
-    kwargs['simulation_mode'] = False
-    kwargs['strategy'] = 'brute_force'
-    kwargs['verify'] = None
+    put_if_not_present(kwargs, "simulation_mode", False)
+    put_if_not_present(kwargs, "strategy", brute_force)
+    put_if_not_present(kwargs, 'verify', None)
     arguments = [target_strategy]
 
     # IMPORTANT when running this script in parallel, always make sure the below name is unique among your runs!

From e9797e246f78c68db890be5a7077acceea2c88e9 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 28 May 2025 17:06:10 +0200
Subject: [PATCH 167/168] Made Hypertuner backend compatible with changes to
 Backend ABC

---
 kernel_tuner/backends/hypertuner.py | 4 ++++
 kernel_tuner/hyper.py               | 2 +-
 test/test_hyper.py                  | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py
index ce090e944..50971f5aa 100644
--- a/kernel_tuner/backends/hypertuner.py
+++ b/kernel_tuner/backends/hypertuner.py
@@ -168,3 +168,7 @@ def memcpy_dtoh(self, dest, src):
     
     def memcpy_htod(self, dest, src):
         return super().memcpy_htod(dest, src)
+
+    def refresh_memory(self, device_memory, host_arguments, should_sync):
+        """This is a no-op for the hypertuner backend, as it does not manage memory directly."""
+        pass
diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py
index 00ee42795..97bc01567 100644
--- a/kernel_tuner/hyper.py
+++ b/kernel_tuner/hyper.py
@@ -72,7 +72,7 @@ def put_if_not_present(target_dict, key, value):
     put_if_not_present(kwargs, "verbose", True)
     put_if_not_present(kwargs, "quiet", False)
     put_if_not_present(kwargs, "simulation_mode", False)
-    put_if_not_present(kwargs, "strategy", brute_force)
+    put_if_not_present(kwargs, "strategy", 'brute_force')
     put_if_not_present(kwargs, 'verify', None)
     arguments = [target_strategy]
 
diff --git a/test/test_hyper.py b/test/test_hyper.py
index f0dcdae5b..7863c2e47 100644
--- a/test/test_hyper.py
+++ b/test/test_hyper.py
@@ -34,6 +34,6 @@ def test_hyper(env):
         }
     }
 
-    result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, compiler_options=compiler_options, verbose=True, cache=None)
+    result, env = tune_hyper_params(target_strategy, hyper_params, restrictions=[], iterations=1, compiler_options=compiler_options, verbose=True, cache=None)
     assert len(result) == 2
     assert 'best_config' in env

From 1a4c439705a4dfcfeed3460763ba852a0b75e044 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 28 May 2025 18:25:41 +0200
Subject: [PATCH 168/168] Adjusted GA popsize to only be adjusted when
 necessary

---
 kernel_tuner/strategies/genetic_algorithm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py
index 2e6104773..27f07e8db 100644
--- a/kernel_tuner/strategies/genetic_algorithm.py
+++ b/kernel_tuner/strategies/genetic_algorithm.py
@@ -24,7 +24,8 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     pop_size, generations, method, mutation_chance, constraint_aware = common.get_options(options, _options)
 
     # if necessary adjust the popsize to a sensible value based on search space size
-    pop_size = min(round((searchspace.size / generations) * 3), pop_size)
+    if pop_size < 2 or pop_size > np.floor(searchspace.size / 2):
+        pop_size = min(max(round((searchspace.size / generations) * 3), 2), pop_size)
 
     GA = GeneticAlgorithm(pop_size, searchspace, method, mutation_chance, constraint_aware)