From 69d83e9261e762b78bf571ab80bf17cd441ef61e Mon Sep 17 00:00:00 2001 From: jmehault Date: Mon, 26 Nov 2018 10:29:08 +0100 Subject: [PATCH 01/10] Test Commit From 59eeecf108e8ea7dd55dbc5f690014adbbaddf45 Mon Sep 17 00:00:00 2001 From: jmehault Date: Mon, 26 Nov 2018 10:37:56 +0100 Subject: [PATCH 02/10] Test Commit From 848c47a630763ce1105be58123c9070490d067ff Mon Sep 17 00:00:00 2001 From: Phypho Date: Tue, 27 Nov 2018 12:11:10 +0100 Subject: [PATCH 03/10] add parameter typing --- bayes_opt/bayesian_optimization.py | 1 + bayes_opt/target_space.py | 29 +++++++++------ bayes_opt/util.py | 58 +++++++++++++++++++++++++----- examples/basic-tour.ipynb | 30 ++++++++-------- 4 files changed, 84 insertions(+), 34 deletions(-) diff --git a/bayes_opt/bayesian_optimization.py b/bayes_opt/bayesian_optimization.py index 850afde3d..b6265877b 100644 --- a/bayes_opt/bayesian_optimization.py +++ b/bayes_opt/bayesian_optimization.py @@ -129,6 +129,7 @@ def suggest(self, utility_function): gp=self._gp, y_max=self._space.target.max(), bounds=self._space.bounds, + btypes=self._space.btypes, random_state=self._random_state ) diff --git a/bayes_opt/target_space.py b/bayes_opt/target_space.py index 4b6febce1..76e9ad926 100644 --- a/bayes_opt/target_space.py +++ b/bayes_opt/target_space.py @@ -16,7 +16,7 @@ class TargetSpace(object): ------- >>> def target_func(p1, p2): >>> return p1 + p2 - >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)} + >>> pbounds = {'p1': [float, (0, 1)], 'p2': [int, (1, 100)]} >>> space = TargetSpace(target_func, pbounds, random_state=0) >>> x = space.random_points(1)[0] >>> y = space.register_point(x) @@ -30,7 +30,7 @@ def __init__(self, target_func, pbounds, random_state=None): Function to be maximized. pbounds : dict - Dictionary with parameters names as keys and a tuple with minimum + Dictionary with parameters names as keys and list with the parameter type first and a tuple with minimum and maximum values. random_state : int, RandomState, or None @@ -44,10 +44,9 @@ def __init__(self, target_func, pbounds, random_state=None): # Get the name of the parameters self._keys = sorted(pbounds) # Create an array with parameters bounds - self._bounds = np.array( - [item[1] for item in sorted(pbounds.items(), key=lambda x: x[0])], - dtype=np.float - ) + self._bounds = np.array([list(pbounds[item][1]) for item in self._keys], dtype=float) + # Create an array with the parameters type + self._btypes = np.array([pbounds[item][0] for item in self._keys], dtype=type) # preallocated memory for X and Y points self._params = np.empty(shape=(0, self.dim)) @@ -87,6 +86,10 @@ def keys(self): def bounds(self): return self._bounds + @property + def btypes(self): + return self._btypes + def params_to_array(self, params): try: assert set(params) == set(self.keys) @@ -207,15 +210,21 @@ def random_sample(self): Example ------- >>> target_func = lambda p1, p2: p1 + p2 - >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)} + >>> pbounds = {'p1': [float, (0, 1)], 'p2': [int, (1, 100)]} >>> space = TargetSpace(target_func, pbounds, random_state=0) >>> space.random_points(1) - array([[ 55.33253689, 0.54488318]]) + array([[ 0.54488318, 55]]) """ - # TODO: support integer, category, and basic scipy.optimize constraints + # TODO: support category, and basic scipy.optimize constraints + # data = np.empty((1, self.dim)) + # for col, (lower, upper) in enumerate(self._bounds): + # data.T[col] = self.random_state.uniform(lower, upper, size=1) data = np.empty((1, self.dim)) for col, (lower, upper) in enumerate(self._bounds): - data.T[col] = self.random_state.uniform(lower, upper, size=1) + if self.btypes[col] != int: + data.T[col] = self.random_state.uniform(lower, upper, size=1) + if self.btypes[col] == int: + data.T[col] = self.random_state.randint(int(lower), int(upper), size=1) return data.ravel() def max(self): diff --git a/bayes_opt/util.py b/bayes_opt/util.py index 67fba9d9b..58d7ec532 100644 --- a/bayes_opt/util.py +++ b/bayes_opt/util.py @@ -4,7 +4,7 @@ from scipy.optimize import minimize -def acq_max(ac, gp, y_max, bounds, random_state, n_warmup=100000, n_iter=250): +def acq_max(ac, gp, y_max, bounds, btypes, random_state, n_warmup=100000, n_iter=250): """ A function to find the maximum of the acquisition function @@ -26,6 +26,9 @@ def acq_max(ac, gp, y_max, bounds, random_state, n_warmup=100000, n_iter=250): :param bounds: The variables bounds to limit the search of the acq max. + :param btypes: + The types of the variables. + :param random_state: instance of np.RandomState random number generator @@ -41,18 +44,34 @@ def acq_max(ac, gp, y_max, bounds, random_state, n_warmup=100000, n_iter=250): """ # Warm up with random points - x_tries = random_state.uniform(bounds[:, 0], bounds[:, 1], - size=(n_warmup, bounds.shape[0])) + x_tries = np.empty((n_warmup, bounds.shape[0])) + for col, name in enumerate(bounds): + # print(col, name) + lower, upper = name + if btypes[col] != int: + x_tries[:, col] = random_state.uniform(lower, upper, size=n_warmup) + if btypes[col] == int: + x_tries[:, col] = random_state.randint(int(lower), int(upper), size=n_warmup) + # x_tries = random_state.uniform(bounds[:, 0], bounds[:, 1], + # size=(n_warmup, bounds.shape[0])) ys = ac(x_tries, gp=gp, y_max=y_max) x_max = x_tries[ys.argmax()] max_acq = ys.max() # Explore the parameter space more throughly - x_seeds = random_state.uniform(bounds[:, 0], bounds[:, 1], - size=(n_iter, bounds.shape[0])) + x_seeds = np.empty((n_iter, bounds.shape[0])) + for col, name in enumerate(bounds): + lower, upper = name + if btypes[col] != int: + x_seeds[:, col] = random_state.uniform(lower, upper, size=n_iter) + if btypes[col] == int: + x_seeds[:, col] = random_state.randint(int(lower), int(upper), size=n_iter) + # x_seeds = random_state.uniform(bounds[:, 0], bounds[:, 1], + # size=(n_iter, bounds.shape[0])) for x_try in x_seeds: # Find the minimum of minus the acquisition function - res = minimize(lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max), + ac_op = lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max) + res = minimize(ac_op, x_try.reshape(1, -1), bounds=bounds, method="L-BFGS-B") @@ -61,10 +80,31 @@ def acq_max(ac, gp, y_max, bounds, random_state, n_warmup=100000, n_iter=250): if not res.success: continue + # If integer in list of bounds + # search minimum between surroundings integers of the detected extremal point + if int in btypes : + x_inf = res.x.copy() + x_sup = res.x.copy() + for i, (val, t) in enumerate(zip(res.x, btypes)): + x_inf[i] = t(val) + x_sup[i] = t(val + 1) if t == int else t(val) + # Store it if better than previous minimum(maximum). + x_ext = [x_inf, x_sup] + if max_acq is None or -res.fun[0] >= max_acq: + max_acq = -1*np.minimum(ac_op(x_inf), ac_op(x_sup)) + x_argmax = np.argmin((ac_op(x_inf), ac_op(x_sup))) + x_max = x_ext[x_argmax] + # If only float in bounds + # store it if better than previous minimum(maximum). + else: + if max_acq is None or -res.fun[0] >= max_acq: + x_max = res.x + max_acq = -res.fun[0] + # Store it if better than previous minimum(maximum). - if max_acq is None or -res.fun[0] >= max_acq: - x_max = res.x - max_acq = -res.fun[0] + # if max_acq is None or -res.fun[0] >= max_acq: + # x_max = res.x + # max_acq = -res.fun[0] # Clip output to make sure it lies within the bounds. Due to floating # point technicalities this is not always the case. diff --git a/examples/basic-tour.ipynb b/examples/basic-tour.ipynb index a21037284..0ac3b5673 100644 --- a/examples/basic-tour.ipynb +++ b/examples/basic-tour.ipynb @@ -1,20 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Basic tour of the Bayesian Optimization package\n", - "\n", - "This is a constrained global optimization package built upon bayesian inference and gaussian process, that attempts to find the maximum value of an unknown function in as few iterations as possible. This technique is particularly suited for optimization of high cost functions, situations where the balance between exploration and exploitation is important.\n", - "\n", - "Bayesian optimization works by constructing a posterior distribution of functions (gaussian process) that best describes the function you want to optimize. As the number of observations grows, the posterior distribution improves, and the algorithm becomes more certain of which regions in parameter space are worth exploring and which are not, as seen in the picture below.\n", - "\n", - "As you iterate over and over, the algorithm balances its needs of exploration and exploitation taking into account what it knows about the target function. At each step a Gaussian Process is fitted to the known samples (points previously explored), and the posterior distribution, combined with a exploration strategy (such as UCB (Upper Confidence Bound), or EI (Expected Improvement)), are used to determine the next point that should be explored (see the gif below).\n", - "\n", - "This process is designed to minimize the number of steps required to find a combination of parameters that are close to the optimal combination. To do so, this method uses a proxy optimization problem (finding the maximum of the acquisition function) that, albeit still a hard problem, is cheaper (in the computational sense) and common tools can be employed. Therefore Bayesian Optimization is most adequate for situations where sampling the function to be optimized is a very expensive endeavor. See the references for a proper discussion of this method." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -42,6 +27,21 @@ " return -x ** 2 - (y - 1) ** 2 + 1" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basic tour of the Bayesian Optimization package\n", + "\n", + "This is a constrained global optimization package built upon bayesian inference and gaussian process, that attempts to find the maximum value of an unknown function in as few iterations as possible. This technique is particularly suited for optimization of high cost functions, situations where the balance between exploration and exploitation is important.\n", + "\n", + "Bayesian optimization works by constructing a posterior distribution of functions (gaussian process) that best describes the function you want to optimize. As the number of observations grows, the posterior distribution improves, and the algorithm becomes more certain of which regions in parameter space are worth exploring and which are not, as seen in the picture below.\n", + "\n", + "As you iterate over and over, the algorithm balances its needs of exploration and exploitation taking into account what it knows about the target function. At each step a Gaussian Process is fitted to the known samples (points previously explored), and the posterior distribution, combined with a exploration strategy (such as UCB (Upper Confidence Bound), or EI (Expected Improvement)), are used to determine the next point that should be explored (see the gif below).\n", + "\n", + "This process is designed to minimize the number of steps required to find a combination of parameters that are close to the optimal combination. To do so, this method uses a proxy optimization problem (finding the maximum of the acquisition function) that, albeit still a hard problem, is cheaper (in the computational sense) and common tools can be employed. Therefore Bayesian Optimization is most adequate for situations where sampling the function to be optimized is a very expensive endeavor. See the references for a proper discussion of this method." + ] + }, { "cell_type": "markdown", "metadata": {}, From 6ef43a5f8f05ce0b1e70b3f4f2f39f3b7619c88f Mon Sep 17 00:00:00 2001 From: Phypho Date: Tue, 27 Nov 2018 15:32:45 +0100 Subject: [PATCH 04/10] update set_bounds function and tests --- bayes_opt/target_space.py | 18 ++++++++++++++---- tests/test_target_space.py | 35 +++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/bayes_opt/target_space.py b/bayes_opt/target_space.py index 76e9ad926..1db3874cd 100644 --- a/bayes_opt/target_space.py +++ b/bayes_opt/target_space.py @@ -145,11 +145,11 @@ def register(self, params, target): Notes ----- - runs in ammortized constant time + runs in amortized constant time Example ------- - >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)} + >>> pbounds = {'p1': [float, (0, 1)], 'p2': [int, (1, 100)]} >>> space = TargetSpace(lambda p1, p2: p1 + p2, pbounds) >>> len(space) 0 @@ -228,7 +228,7 @@ def random_sample(self): return data.ravel() def max(self): - """Get maximum target value found and corresponding parametes.""" + """Get maximum target value found and corresponding parameters.""" try: res = { 'target': self.target.max(), @@ -257,7 +257,17 @@ def set_bounds(self, new_bounds): ---------- new_bounds : dict A dictionary with the parameter name and its new bounds + + Returns + ---------- + if type of modified parameter is int, then return rounded integer value + Example : new_bounds = {"p1", (1.2, 8.7)} and "p1" is integer + then new_bounds are (1,9) """ for row, key in enumerate(self.keys): if key in new_bounds: - self._bounds[row] = new_bounds[key] + if self._btypes[row]==int: + lbound = self._btypes[row](np.round(new_bounds[key][0], 0)) + ubound = self._btypes[row](np.round(new_bounds[key][1], 0)) + new_bounds[key] = (lbound, ubound) + self._bounds[row] = list(new_bounds[key]) diff --git a/tests/test_target_space.py b/tests/test_target_space.py index 7ca8b827c..8e3e11fc8 100644 --- a/tests/test_target_space.py +++ b/tests/test_target_space.py @@ -8,21 +8,22 @@ def target_func(**kwargs): return sum(kwargs.values()) -PBOUNDS = {'p1': (0, 1), 'p2': (1, 100)} +PBOUNDS = {'p1': [float, (0, 10)], 'p2': [int, (1, 100)]} def test_keys_and_bounds_in_same_order(): pbounds = { - 'p1': (0, 1), - 'p3': (0, 3), - 'p2': (0, 2), - 'p4': (0, 4), + 'p1': [int, (0, 1)], + 'p3': [int, (0, 3)], + 'p2': [float, (0, 2)], + 'p4': [float, (0, 4)] } space = TargetSpace(target_func, pbounds) assert space.dim == len(pbounds) assert space.empty assert space.keys == ["p1", "p2", "p3", "p4"] + assert list(space.btypes) == [int, float, int, float] assert all(space.bounds[:, 0] == np.array([0, 0, 0, 0])) assert all(space.bounds[:, 1] == np.array([1, 2, 3, 4])) @@ -124,10 +125,10 @@ def test_probe(): def test_random_sample(): pbounds = { - 'p1': (0, 1), - 'p3': (0, 3), - 'p2': (0, 2), - 'p4': (0, 4), + 'p1': [int, (0, 1)], + 'p3': [int, (0, 3)], + 'p2': [float, (0, 2)], + 'p4': [float, (0, 4)] } space = TargetSpace(target_func, pbounds, random_state=8) @@ -139,6 +140,7 @@ def test_random_sample(): def test_max(): + print(PBOUNDS) space = TargetSpace(target_func, PBOUNDS) assert space.max() == {} @@ -170,10 +172,10 @@ def test_res(): def test_set_bounds(): pbounds = { - 'p1': (0, 1), - 'p3': (0, 3), - 'p2': (0, 2), - 'p4': (0, 4), + 'p1': [int, (0, 1)], + 'p3': [int, (0, 3)], + 'p2': [float, (0, 2)], + 'p4': [float, (0, 4)] } space = TargetSpace(target_func, pbounds) @@ -183,9 +185,10 @@ def test_set_bounds(): assert all(space.bounds[:, 1] == np.array([1, 2, 3, 4])) # Update bounds accordingly - space.set_bounds({"p2": (1, 8)}) - assert all(space.bounds[:, 0] == np.array([0, 1, 0, 0])) - assert all(space.bounds[:, 1] == np.array([1, 8, 3, 4])) + space.set_bounds({"p3": (1.1, 8.7)}) + print(space.bounds) + assert all(space.bounds[:, 0] == np.array([0, 0, 1, 0])) + assert all(space.bounds[:, 1] == np.array([1, 2, 9, 4])) if __name__ == '__main__': From 113a28ff44ffd2e6b265a775fd2714dafe45539a Mon Sep 17 00:00:00 2001 From: phypho Date: Wed, 28 Nov 2018 12:07:28 +0100 Subject: [PATCH 05/10] add example and update sklearn_example.py --- examples/bo_parameterTyping_example.py | 19 +++++++++++++++++++ examples/sklearn_example.py | 12 ++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 examples/bo_parameterTyping_example.py diff --git a/examples/bo_parameterTyping_example.py b/examples/bo_parameterTyping_example.py new file mode 100644 index 000000000..b1c9b27a2 --- /dev/null +++ b/examples/bo_parameterTyping_example.py @@ -0,0 +1,19 @@ +from bayes_opt import BayesianOptimization + +# function to be maximized - must find (x=0;y=10) +targetFunction = lambda x, y: -(x-0.5) ** 2 - (y - 10) ** 2 + 1 + +# define parameters bounds +bounds = {'y': [int, (5, 15)], 'x': [float, (-3, 3)]} +bo = BayesianOptimization(targetFunction, bounds) + +bo.probe({"x":1.4, "y":6}) +bo.probe({"x":2.4, "y":12}) +bo.probe({"x":-2.4, "y":13}) + +bo.maximize(init_points=10, n_iter=20, kappa=2) + +# print results +print(f'Estimated position of the maximum: {bo.max}') +print(f'List of tested positions:\n{bo.res}') + diff --git a/examples/sklearn_example.py b/examples/sklearn_example.py index e4e5d88e0..27b32f8ee 100644 --- a/examples/sklearn_example.py +++ b/examples/sklearn_example.py @@ -70,7 +70,7 @@ def svc_crossval(expC, expGamma): optimizer = BayesianOptimization( f=svc_crossval, - pbounds={"expC": (-3, 2), "expGamma": (-4, -1)}, + pbounds={"expC": [float, (-3, 2)], "expGamma": [float, (-4, -1)]}, random_state=1234, verbose=2 ) @@ -90,8 +90,8 @@ def rfc_crossval(n_estimators, min_samples_split, max_features): accordingly. """ return rfc_cv( - n_estimators=int(n_estimators), - min_samples_split=int(min_samples_split), + n_estimators=n_estimators, + min_samples_split=min_samples_split, max_features=max(min(max_features, 0.999), 1e-3), data=data, targets=targets, @@ -100,9 +100,9 @@ def rfc_crossval(n_estimators, min_samples_split, max_features): optimizer = BayesianOptimization( f=rfc_crossval, pbounds={ - "n_estimators": (10, 250), - "min_samples_split": (2, 25), - "max_features": (0.1, 0.999), + "n_estimators": [int, (10, 250)], + "min_samples_split": [int, (2, 25)], + "max_features": [float, (0.1, 0.999)] }, random_state=1234, verbose=2 From 57df9e15fd9cb669f970250e6402249ce2dc82dc Mon Sep 17 00:00:00 2001 From: phypho Date: Wed, 28 Nov 2018 18:26:20 +0100 Subject: [PATCH 06/10] update PBOUNDS in test_bayesian_optimization.py --- tests/test_bayesian_optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_bayesian_optimization.py b/tests/test_bayesian_optimization.py index a2c31e753..62f0fd480 100644 --- a/tests/test_bayesian_optimization.py +++ b/tests/test_bayesian_optimization.py @@ -9,7 +9,7 @@ def target_func(**kwargs): return sum(kwargs.values()) -PBOUNDS = {'p1': (0, 10), 'p2': (0, 10)} +PBOUNDS = {'p1': [float, (0, 10)], 'p2': [int, (0, 10)]} def test_register(): From 92a9e1cc28112cc3078a7eedd873f32d697b25e3 Mon Sep 17 00:00:00 2001 From: phypho Date: Wed, 28 Nov 2018 18:35:07 +0100 Subject: [PATCH 07/10] update test_util.py --- tests/test_util.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 04ecf5deb..589ea1ab2 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -61,6 +61,7 @@ def test_acq_with_ucb(): GP, y_max, bounds=np.array([[0, 1], [0, 1]]), + btypes=[float, float], random_state=ensure_rng(0), n_iter=20 ) @@ -79,6 +80,7 @@ def test_acq_with_ei(): GP, y_max, bounds=np.array([[0, 1], [0, 1]]), + btypes=[float, float], random_state=ensure_rng(0), n_iter=200, ) @@ -97,6 +99,7 @@ def test_acq_with_poi(): GP, y_max, bounds=np.array([[0, 1], [0, 1]]), + btypes=[float, float], random_state=ensure_rng(0), n_iter=200, ) @@ -112,22 +115,22 @@ def f(x, y): optimizer = BayesianOptimization( f=f, - pbounds={"x": (-2, 2), "y": (-2, 2)} + pbounds={"x": [float, (-2, 2)], "y": [float, (-2, 2)]} ) assert len(optimizer.space) == 0 - load_logs(optimizer, "./tests/test_logs.json") + load_logs(optimizer, "./test_logs.json") assert len(optimizer.space) == 5 - load_logs(optimizer, ["./tests/test_logs.json"]) + load_logs(optimizer, ["./test_logs.json"]) assert len(optimizer.space) == 5 other_optimizer = BayesianOptimization( f=lambda x: -x ** 2, - pbounds={"x": (-2, 2)} + pbounds={"x": [float, (-2, 2)]} ) with pytest.raises(ValueError): - load_logs(other_optimizer, ["./tests/test_logs.json"]) + load_logs(other_optimizer, ["./test_logs.json"]) if __name__ == '__main__': From 833f96040c8194a5738ae655dfb323c29da317a2 Mon Sep 17 00:00:00 2001 From: phypho Date: Wed, 28 Nov 2018 18:44:06 +0100 Subject: [PATCH 08/10] fix typos --- bayes_opt/target_space.py | 2 +- examples/bo_parameterTyping_example.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bayes_opt/target_space.py b/bayes_opt/target_space.py index 1db3874cd..6ef0c70ae 100644 --- a/bayes_opt/target_space.py +++ b/bayes_opt/target_space.py @@ -266,7 +266,7 @@ def set_bounds(self, new_bounds): """ for row, key in enumerate(self.keys): if key in new_bounds: - if self._btypes[row]==int: + if self._btypes[row] == int: lbound = self._btypes[row](np.round(new_bounds[key][0], 0)) ubound = self._btypes[row](np.round(new_bounds[key][1], 0)) new_bounds[key] = (lbound, ubound) diff --git a/examples/bo_parameterTyping_example.py b/examples/bo_parameterTyping_example.py index b1c9b27a2..5c399d4da 100644 --- a/examples/bo_parameterTyping_example.py +++ b/examples/bo_parameterTyping_example.py @@ -7,9 +7,9 @@ bounds = {'y': [int, (5, 15)], 'x': [float, (-3, 3)]} bo = BayesianOptimization(targetFunction, bounds) -bo.probe({"x":1.4, "y":6}) -bo.probe({"x":2.4, "y":12}) -bo.probe({"x":-2.4, "y":13}) +bo.probe({"x": 1.4, "y": 6}) +bo.probe({"x": 2.4, "y": 12}) +bo.probe({"x": -2.4, "y": 13}) bo.maximize(init_points=10, n_iter=20, kappa=2) From 7f189d3ec659499bd5f69fe7c83c1fdb9b316d5e Mon Sep 17 00:00:00 2001 From: phypho Date: Thu, 29 Nov 2018 08:31:20 +0100 Subject: [PATCH 09/10] change paradigm of parameter typing: add ptypes dictionnary instead inf including types into pbounds --- bayes_opt/bayesian_optimization.py | 4 +- bayes_opt/target_space.py | 59 +++++++++++------ bayes_opt/util.py | 88 ++++++++++++++++---------- examples/bo_parameterTyping_example.py | 7 +- examples/sklearn_example.py | 11 +++- tests/test_bayesian_optimization.py | 22 +++---- tests/test_target_space.py | 57 ++++++++++------- tests/test_util.py | 5 +- 8 files changed, 154 insertions(+), 99 deletions(-) diff --git a/bayes_opt/bayesian_optimization.py b/bayes_opt/bayesian_optimization.py index b6265877b..4a9305885 100644 --- a/bayes_opt/bayesian_optimization.py +++ b/bayes_opt/bayesian_optimization.py @@ -64,13 +64,13 @@ def dispatch(self, event): class BayesianOptimization(Observable): - def __init__(self, f, pbounds, random_state=None, verbose=2): + def __init__(self, f, pbounds, ptypes=None, random_state=None, verbose=2): """""" self._random_state = ensure_rng(random_state) # Data structure containing the function to be optimized, the bounds of # its domain, and a record of the evaluations we have done so far - self._space = TargetSpace(f, pbounds, random_state) + self._space = TargetSpace(f, pbounds, ptypes, random_state) # queue self._queue = Queue() diff --git a/bayes_opt/target_space.py b/bayes_opt/target_space.py index 6ef0c70ae..9e00696d6 100644 --- a/bayes_opt/target_space.py +++ b/bayes_opt/target_space.py @@ -16,13 +16,14 @@ class TargetSpace(object): ------- >>> def target_func(p1, p2): >>> return p1 + p2 - >>> pbounds = {'p1': [float, (0, 1)], 'p2': [int, (1, 100)]} - >>> space = TargetSpace(target_func, pbounds, random_state=0) + >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)} + >>> ptypes = {'p1': float, 'p2': int} + >>> space = TargetSpace(target_func, pbounds, ptypes, random_state=0) >>> x = space.random_points(1)[0] >>> y = space.register_point(x) >>> assert self.max_point()['max_val'] == y """ - def __init__(self, target_func, pbounds, random_state=None): + def __init__(self, target_func, pbounds, ptypes=None,random_state=None): """ Parameters ---------- @@ -30,9 +31,12 @@ def __init__(self, target_func, pbounds, random_state=None): Function to be maximized. pbounds : dict - Dictionary with parameters names as keys and list with the parameter type first and a tuple with minimum + Dictionary with parameter names and list of the minimum and maximum boundaries and maximum values. + ptypes : dict + Dictionnary with parameter names and their type + random_state : int, RandomState, or None optionally specify a seed for a random number generator """ @@ -44,9 +48,19 @@ def __init__(self, target_func, pbounds, random_state=None): # Get the name of the parameters self._keys = sorted(pbounds) # Create an array with parameters bounds - self._bounds = np.array([list(pbounds[item][1]) for item in self._keys], dtype=float) - # Create an array with the parameters type - self._btypes = np.array([pbounds[item][0] for item in self._keys], dtype=type) + self._bounds = np.array([list(pbounds[item]) for item in self._keys], dtype=float) + # Create an array with the parameters type if declared + if ptypes is None: + self._btypes = None + else: + ## TODO: add exception if parameter names in btypes and ptypes do not have the same length and content + ## TODO: or store pbounds and ptypes has dictionnaries + try: + assert (len(ptypes) == len(pbounds)) + except AssertionError: + raise AssertionError("ptypes and pbounds do not have same content."+\ + "ptypes and pbounds must list exact same parameters") + self._btypes = np.array([ptypes[item] for item in self._keys], dtype=type) # preallocated memory for X and Y points self._params = np.empty(shape=(0, self.dim)) @@ -149,7 +163,8 @@ def register(self, params, target): Example ------- - >>> pbounds = {'p1': [float, (0, 1)], 'p2': [int, (1, 100)]} + >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)} + >>> ptypes = {'p1': float, 'p2':int} >>> space = TargetSpace(lambda p1, p2: p1 + p2, pbounds) >>> len(space) 0 @@ -210,21 +225,23 @@ def random_sample(self): Example ------- >>> target_func = lambda p1, p2: p1 + p2 - >>> pbounds = {'p1': [float, (0, 1)], 'p2': [int, (1, 100)]} + >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)} + >>> ptypes = {'p1': float, 'p2':int} >>> space = TargetSpace(target_func, pbounds, random_state=0) >>> space.random_points(1) array([[ 0.54488318, 55]]) """ # TODO: support category, and basic scipy.optimize constraints - # data = np.empty((1, self.dim)) - # for col, (lower, upper) in enumerate(self._bounds): - # data.T[col] = self.random_state.uniform(lower, upper, size=1) data = np.empty((1, self.dim)) - for col, (lower, upper) in enumerate(self._bounds): - if self.btypes[col] != int: + if self.btypes is None: + for col, (lower, upper) in enumerate(self._bounds): data.T[col] = self.random_state.uniform(lower, upper, size=1) - if self.btypes[col] == int: - data.T[col] = self.random_state.randint(int(lower), int(upper), size=1) + else: + for col, (lower, upper) in enumerate(self._bounds): + if self.btypes[col] != int: + data.T[col] = self.random_state.uniform(lower, upper, size=1) + if self.btypes[col] == int: + data.T[col] = self.random_state.randint(int(lower), int(upper), size=1) return data.ravel() def max(self): @@ -266,8 +283,10 @@ def set_bounds(self, new_bounds): """ for row, key in enumerate(self.keys): if key in new_bounds: - if self._btypes[row] == int: - lbound = self._btypes[row](np.round(new_bounds[key][0], 0)) - ubound = self._btypes[row](np.round(new_bounds[key][1], 0)) - new_bounds[key] = (lbound, ubound) + if self._btypes is not None: + if self._btypes[row] == int: + lbound = self._btypes[row](np.round(new_bounds[key][0], 0)) + ubound = self._btypes[row](np.round(new_bounds[key][1], 0)) + new_bounds[key] = (lbound, ubound) + self._bounds[row] = list(new_bounds[key]) self._bounds[row] = list(new_bounds[key]) diff --git a/bayes_opt/util.py b/bayes_opt/util.py index 58d7ec532..8ff48295f 100644 --- a/bayes_opt/util.py +++ b/bayes_opt/util.py @@ -3,8 +3,39 @@ from scipy.stats import norm from scipy.optimize import minimize +def generate_trials(n_events, bounds, btypes, random_state): + """A function to generate set of events under several constrains -def acq_max(ac, gp, y_max, bounds, btypes, random_state, n_warmup=100000, n_iter=250): + Parameters + ---------- + :param n_events: + The number of events to generate + + :param bounds: + The variables bounds to limit the search of the acq max. + + :param btypes: + The types of the variables. + + :param random_state: + Instance of np.RandomState random number generator + """ + x_trials = np.empty((n_events, bounds.shape[0])) + if btypes is None: + x_trials = random_state.uniform(bounds[:, 0], bounds[:, 1], + size=(n_events, bounds.shape[0])) + else: + for col, name in enumerate(bounds): + # print(col, name) + lower, upper = name + if btypes[col] != int: + x_trials[:, col] = random_state.uniform(lower, upper, size=n_events) + if btypes[col] == int: + x_trials[:, col] = random_state.randint(int(lower), int(upper), size=n_events) + return x_trials + + +def acq_max(ac, gp, y_max, bounds, random_state, btypes=None, n_warmup=100000, n_iter=250): """ A function to find the maximum of the acquisition function @@ -42,32 +73,14 @@ def acq_max(ac, gp, y_max, bounds, btypes, random_state, n_warmup=100000, n_iter ------- :return: x_max, The arg max of the acquisition function. """ - # Warm up with random points - x_tries = np.empty((n_warmup, bounds.shape[0])) - for col, name in enumerate(bounds): - # print(col, name) - lower, upper = name - if btypes[col] != int: - x_tries[:, col] = random_state.uniform(lower, upper, size=n_warmup) - if btypes[col] == int: - x_tries[:, col] = random_state.randint(int(lower), int(upper), size=n_warmup) - # x_tries = random_state.uniform(bounds[:, 0], bounds[:, 1], - # size=(n_warmup, bounds.shape[0])) + x_tries = generate_trials(n_warmup, bounds, btypes, random_state) ys = ac(x_tries, gp=gp, y_max=y_max) x_max = x_tries[ys.argmax()] max_acq = ys.max() # Explore the parameter space more throughly - x_seeds = np.empty((n_iter, bounds.shape[0])) - for col, name in enumerate(bounds): - lower, upper = name - if btypes[col] != int: - x_seeds[:, col] = random_state.uniform(lower, upper, size=n_iter) - if btypes[col] == int: - x_seeds[:, col] = random_state.randint(int(lower), int(upper), size=n_iter) - # x_seeds = random_state.uniform(bounds[:, 0], bounds[:, 1], - # size=(n_iter, bounds.shape[0])) + x_seeds = generate_trials(n_iter, bounds, btypes, random_state) for x_try in x_seeds: # Find the minimum of minus the acquisition function ac_op = lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max) @@ -82,20 +95,25 @@ def acq_max(ac, gp, y_max, bounds, btypes, random_state, n_warmup=100000, n_iter # If integer in list of bounds # search minimum between surroundings integers of the detected extremal point - if int in btypes : - x_inf = res.x.copy() - x_sup = res.x.copy() - for i, (val, t) in enumerate(zip(res.x, btypes)): - x_inf[i] = t(val) - x_sup[i] = t(val + 1) if t == int else t(val) - # Store it if better than previous minimum(maximum). - x_ext = [x_inf, x_sup] - if max_acq is None or -res.fun[0] >= max_acq: - max_acq = -1*np.minimum(ac_op(x_inf), ac_op(x_sup)) - x_argmax = np.argmin((ac_op(x_inf), ac_op(x_sup))) - x_max = x_ext[x_argmax] - # If only float in bounds - # store it if better than previous minimum(maximum). + if btypes is not None: + if int in btypes: + x_inf = res.x.copy() + x_sup = res.x.copy() + for i, (val, t) in enumerate(zip(res.x, btypes)): + x_inf[i] = t(val) + x_sup[i] = t(val + 1) if t == int else t(val) + # Store it if better than previous minimum(maximum). + x_ext = [x_inf, x_sup] + if max_acq is None or -res.fun[0] >= max_acq: + max_acq = -1*np.minimum(ac_op(x_inf), ac_op(x_sup)) + x_argmax = np.argmin((ac_op(x_inf), ac_op(x_sup))) + x_max = x_ext[x_argmax] + else: + # If only float in bounds + # store it if better than previous minimum(maximum). + if max_acq is None or -res.fun[0] >= max_acq: + x_max = res.x + max_acq = -res.fun[0] else: if max_acq is None or -res.fun[0] >= max_acq: x_max = res.x diff --git a/examples/bo_parameterTyping_example.py b/examples/bo_parameterTyping_example.py index 5c399d4da..cd94669ea 100644 --- a/examples/bo_parameterTyping_example.py +++ b/examples/bo_parameterTyping_example.py @@ -4,14 +4,15 @@ targetFunction = lambda x, y: -(x-0.5) ** 2 - (y - 10) ** 2 + 1 # define parameters bounds -bounds = {'y': [int, (5, 15)], 'x': [float, (-3, 3)]} -bo = BayesianOptimization(targetFunction, bounds) +bounds = {'y': (5, 15), 'x': (-3, 3)} +btypes = {'y':int, 'x':float} +bo = BayesianOptimization(targetFunction, bounds) #, ptypes=btypes) bo.probe({"x": 1.4, "y": 6}) bo.probe({"x": 2.4, "y": 12}) bo.probe({"x": -2.4, "y": 13}) -bo.maximize(init_points=10, n_iter=20, kappa=2) +bo.maximize(init_points=5, n_iter=5, kappa=2) # print results print(f'Estimated position of the maximum: {bo.max}') diff --git a/examples/sklearn_example.py b/examples/sklearn_example.py index 27b32f8ee..770f3b1c3 100644 --- a/examples/sklearn_example.py +++ b/examples/sklearn_example.py @@ -71,6 +71,7 @@ def svc_crossval(expC, expGamma): optimizer = BayesianOptimization( f=svc_crossval, pbounds={"expC": [float, (-3, 2)], "expGamma": [float, (-4, -1)]}, + ptypes={"expC": float, "expGamma":float}, random_state=1234, verbose=2 ) @@ -100,9 +101,13 @@ def rfc_crossval(n_estimators, min_samples_split, max_features): optimizer = BayesianOptimization( f=rfc_crossval, pbounds={ - "n_estimators": [int, (10, 250)], - "min_samples_split": [int, (2, 25)], - "max_features": [float, (0.1, 0.999)] + "n_estimators": (10, 250), + "min_samples_split": (2, 25), + "max_features": (0.1, 0.999) + }, + ptypes={"n_estimators": int, + "min_samples_split": int, + "max_features": float }, random_state=1234, verbose=2 diff --git a/tests/test_bayesian_optimization.py b/tests/test_bayesian_optimization.py index 62f0fd480..1775ebd8b 100644 --- a/tests/test_bayesian_optimization.py +++ b/tests/test_bayesian_optimization.py @@ -9,11 +9,11 @@ def target_func(**kwargs): return sum(kwargs.values()) -PBOUNDS = {'p1': [float, (0, 10)], 'p2': [int, (0, 10)]} - +PBOUNDS = {'p1': (0, 10), 'p2': (0, 10)} +PTYPES = {'p1': float, 'p2': int} def test_register(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) assert len(optimizer.space) == 0 optimizer.register(params={"p1": 1, "p2": 2}, target=3) @@ -31,7 +31,7 @@ def test_register(): def test_probe_lazy(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) optimizer.probe(params={"p1": 1, "p2": 2}, lazy=True) assert len(optimizer.space) == 0 @@ -47,7 +47,7 @@ def test_probe_lazy(): def test_probe_eager(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) optimizer.probe(params={"p1": 1, "p2": 2}, lazy=False) assert len(optimizer.space) == 1 @@ -70,7 +70,7 @@ def test_probe_eager(): def test_suggest_at_random(): util = UtilityFunction(kind="ucb", kappa=5, xi=0) - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) for _ in range(50): sample = optimizer.space.params_to_array(optimizer.suggest(util)) @@ -81,7 +81,7 @@ def test_suggest_at_random(): def test_suggest_with_one_observation(): util = UtilityFunction(kind="ucb", kappa=5, xi=0) - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) optimizer.register(params={"p1": 1, "p2": 2}, target=3) @@ -98,7 +98,7 @@ def test_suggest_with_one_observation(): def test_prime_queue_all_empty(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) assert len(optimizer._queue) == 0 assert len(optimizer.space) == 0 @@ -108,7 +108,7 @@ def test_prime_queue_all_empty(): def test_prime_queue_empty_with_init(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) assert len(optimizer._queue) == 0 assert len(optimizer.space) == 0 @@ -118,7 +118,7 @@ def test_prime_queue_empty_with_init(): def test_prime_queue_with_register(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) assert len(optimizer._queue) == 0 assert len(optimizer.space) == 0 @@ -129,7 +129,7 @@ def test_prime_queue_with_register(): def test_prime_queue_with_register_and_init(): - optimizer = BayesianOptimization(target_func, PBOUNDS, random_state=1) + optimizer = BayesianOptimization(target_func, PBOUNDS, PTYPES, random_state=1) assert len(optimizer._queue) == 0 assert len(optimizer.space) == 0 diff --git a/tests/test_target_space.py b/tests/test_target_space.py index 8e3e11fc8..8d5e99e9c 100644 --- a/tests/test_target_space.py +++ b/tests/test_target_space.py @@ -8,17 +8,19 @@ def target_func(**kwargs): return sum(kwargs.values()) -PBOUNDS = {'p1': [float, (0, 10)], 'p2': [int, (1, 100)]} +PBOUNDS = {'p1': (0, 10), 'p2': (1, 100)} +PTYPES = {'p1': float, 'p2':int} def test_keys_and_bounds_in_same_order(): pbounds = { - 'p1': [int, (0, 1)], - 'p3': [int, (0, 3)], - 'p2': [float, (0, 2)], - 'p4': [float, (0, 4)] + 'p1': (0, 1), + 'p3': (0, 3), + 'p2': (0, 2), + 'p4': (0, 4) } - space = TargetSpace(target_func, pbounds) + ptypes = {'p1':int, 'p2':float, 'p3':int, 'p4':float} + space = TargetSpace(target_func, pbounds, ptypes) assert space.dim == len(pbounds) assert space.empty @@ -29,7 +31,7 @@ def test_keys_and_bounds_in_same_order(): def test_params_to_array(): - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) assert all(space.params_to_array({"p1": 2, "p2": 3}) == np.array([2, 3])) assert all(space.params_to_array({"p2": 2, "p1": 9}) == np.array([9, 2])) @@ -42,7 +44,7 @@ def test_params_to_array(): def test_array_to_params(): - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) assert space.array_to_params(np.array([2, 3])) == {"p1": 2, "p2": 3} with pytest.raises(ValueError): @@ -52,7 +54,7 @@ def test_array_to_params(): def test_as_array(): - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) x = space._as_array([0, 1]) assert x.shape == (2,) @@ -73,7 +75,7 @@ def test_as_array(): def test_register(): - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) assert len(space) == 0 # registering with dict @@ -95,7 +97,7 @@ def test_register(): def test_probe(): - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) assert len(space) == 0 # probing with dict @@ -125,12 +127,13 @@ def test_probe(): def test_random_sample(): pbounds = { - 'p1': [int, (0, 1)], - 'p3': [int, (0, 3)], - 'p2': [float, (0, 2)], - 'p4': [float, (0, 4)] + 'p1': (0, 1), + 'p3': (0, 3), + 'p2': (0, 2), + 'p4': (0, 4) } - space = TargetSpace(target_func, pbounds, random_state=8) + ptypes = {'p1': int, 'p2': float, 'p3': int, 'p4': float} + space = TargetSpace(target_func, pbounds, ptypes, random_state=8) for _ in range(50): random_sample = space.random_sample() @@ -141,7 +144,7 @@ def test_random_sample(): def test_max(): print(PBOUNDS) - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) assert space.max() == {} space.probe(params={"p1": 1, "p2": 2}) @@ -152,7 +155,7 @@ def test_max(): def test_res(): - space = TargetSpace(target_func, PBOUNDS) + space = TargetSpace(target_func, PBOUNDS, PTYPES) assert space.res() == [] space.probe(params={"p1": 1, "p2": 2}) @@ -172,12 +175,13 @@ def test_res(): def test_set_bounds(): pbounds = { - 'p1': [int, (0, 1)], - 'p3': [int, (0, 3)], - 'p2': [float, (0, 2)], - 'p4': [float, (0, 4)] + 'p1': (0, 1), + 'p3': (0, 3), + 'p2': (0, 2), + 'p4': (0, 4) } - space = TargetSpace(target_func, pbounds) + ptypes = {'p1':int, 'p2':float, 'p3':int, 'p4':float} + space = TargetSpace(target_func, pbounds, ptypes) # Ignore unknown keys space.set_bounds({"other": (7, 8)}) @@ -190,6 +194,13 @@ def test_set_bounds(): assert all(space.bounds[:, 0] == np.array([0, 0, 1, 0])) assert all(space.bounds[:, 1] == np.array([1, 2, 9, 4])) + ptypes = None + space = TargetSpace(target_func, pbounds, ptypes) + space.set_bounds({"p3": (1.1, 8.7)}) + print(space.bounds) + assert all(space.bounds[:, 0] == np.array([0, 0, 1.1, 0])) + assert all(space.bounds[:, 1] == np.array([1, 2, 8.7, 4])) + if __name__ == '__main__': r""" diff --git a/tests/test_util.py b/tests/test_util.py index 589ea1ab2..747989501 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -115,7 +115,8 @@ def f(x, y): optimizer = BayesianOptimization( f=f, - pbounds={"x": [float, (-2, 2)], "y": [float, (-2, 2)]} + pbounds={"x": (-2, 2), "y": (-2, 2)}, + ptypes={'x':float, 'y':float} ) assert len(optimizer.space) == 0 @@ -127,7 +128,7 @@ def f(x, y): other_optimizer = BayesianOptimization( f=lambda x: -x ** 2, - pbounds={"x": [float, (-2, 2)]} + pbounds={"x": (-2, 2)} ) with pytest.raises(ValueError): load_logs(other_optimizer, ["./test_logs.json"]) From 7c026d7ca4656f53e7d7745de5498dafad5ab0ce Mon Sep 17 00:00:00 2001 From: phypho Date: Thu, 29 Nov 2018 11:28:33 +0100 Subject: [PATCH 10/10] update test_utils.py --- tests/test_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 747989501..e1e8b7098 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -120,10 +120,10 @@ def f(x, y): ) assert len(optimizer.space) == 0 - load_logs(optimizer, "./test_logs.json") + load_logs(optimizer, "./tests/test_logs.json") assert len(optimizer.space) == 5 - load_logs(optimizer, ["./test_logs.json"]) + load_logs(optimizer, ["./tests/test_logs.json"]) assert len(optimizer.space) == 5 other_optimizer = BayesianOptimization( @@ -131,7 +131,7 @@ def f(x, y): pbounds={"x": (-2, 2)} ) with pytest.raises(ValueError): - load_logs(other_optimizer, ["./test_logs.json"]) + load_logs(other_optimizer, ["./tests/test_logs.json"]) if __name__ == '__main__':