Skip to content

Commit

Permalink
fix!: replace GPy with scikit-learn in GaussianProcessSampler
Browse files Browse the repository at this point in the history
BREAKING CHANGE: This commit updates the implementation of GaussianProcessSampler. Instead of relying on GPy, now we use the implementation of Gaussian Processes provided by the scikit-learn package.

The change introduces a different behaviour of the GaussianProcessSampler, even if the same random seed is provided. This is witnessed by the updated expected sampled parameters and losses in tests/test_calibrator.py.

Another notable difference is in the keyword arguments for the GaussianProcessSampler class constructor. In particular:
- the keyword argument 'max_iters' has been removed
- the keyword argument 'jitter' has been added.
  • Loading branch information
AldoGl committed Sep 19, 2023
1 parent c5a5b88 commit 1c4a3b6
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 271 deletions.
90 changes: 34 additions & 56 deletions black_it/samplers/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""This module contains the implementation of the Gaussian process-based sampling."""
import random

import warnings
from enum import Enum
from typing import Optional, Tuple, cast
from typing import Optional, Tuple, Union, cast

import GPy
import numpy as np
from GPy.models import GPRegression
from numpy.typing import NDArray
from scipy.special import erfc # pylint: disable=no-name-in-module
from sklearn.gaussian_process import GaussianProcessRegressor, kernels

from black_it.samplers.surrogate import MLSurrogateSampler

Expand All @@ -46,7 +45,7 @@ class GaussianProcessSampler(MLSurrogateSampler):
In particular, the sampling is based on a Gaussian Process interpolation of the loss function.
Note: this class is a wrapper of the GPRegression model of the GPy package.
Note: this class is a wrapper of the GaussianProcessRegressor model of the scikit-learn package.
"""

def __init__( # pylint: disable=too-many-arguments
Expand All @@ -55,9 +54,9 @@ def __init__( # pylint: disable=too-many-arguments
random_state: Optional[int] = None,
max_deduplication_passes: int = 5,
candidate_pool_size: Optional[int] = None,
max_iters: int = 1000,
optimize_restarts: int = 5,
acquisition: str = "expected_improvement",
jitter: float = 0.1,
):
"""
Initialize the sampler.
Expand All @@ -67,19 +66,20 @@ def __init__( # pylint: disable=too-many-arguments
random_state: the random state of the sampler, fixing this number the sampler behaves deterministically
max_deduplication_passes: the maximum number of deduplication passes that are made
candidate_pool_size: number of randomly sampled points on which the random forest predictions are evaluated
max_iters: maximum number of iteration in the optimization of the GP hyperparameters
optimize_restarts: number of independent random trials of the optimization of the GP hyperparameters
acquisition: type of acquisition function, it can be 'expected_improvement' of simply 'mean'
jitter: positive value to make the "expected_improvement" acquisition more explorative.
"""
self._validate_acquisition(acquisition)

super().__init__(
batch_size, random_state, max_deduplication_passes, candidate_pool_size
)
self.max_iters = max_iters
self.optimize_restarts = optimize_restarts
self.acquisition = acquisition
self._gpmodel: Optional[GPRegression] = None
self.jitter = jitter
self._gpmodel: Optional[GaussianProcessRegressor] = None
self._fmin: Optional[Union[np.double, float]] = None

@staticmethod
def _validate_acquisition(acquisition: str) -> None:
Expand Down Expand Up @@ -111,49 +111,34 @@ def fit(self, X: NDArray[np.float64], y: NDArray[np.float64]) -> None:
RuntimeWarning,
)

# initialize GP class from GPy with a Matern kernel by default
dims = X.shape[1]
kern = GPy.kern.Matern52(dims, variance=1.0, ARD=False)
# initialize GP class from scikit-learn with a Matern kernel
kernel = kernels.Matern(length_scale=1, length_scale_bounds=(1e-5, 1e5), nu=2.5)

noise_var = y.var() * 0.01

self._gpmodel = GPRegression(
X, y, kernel=kern, noise_var=noise_var, mean_function=None
self._gpmodel = GaussianProcessRegressor(
kernel=kernel,
alpha=noise_var,
n_restarts_optimizer=self.optimize_restarts,
optimizer="fmin_l_bfgs_b",
random_state=self._get_random_seed(),
)

# Make sure we do not get ridiculously small residual noise variance
self._gpmodel.Gaussian_noise.constrain_bounded(
1e-9, 1e6, warning=False
) # constrain_positive(warning=False)

# we need to set the seed globally for GPy optimisations
# to give reproducible results
np.random.seed(self._get_random_seed())
random.seed(self._get_random_seed())
if self.max_iters > 0:
# --- update the model maximizing the marginal likelihood.
if self.optimize_restarts == 1:
self._gpmodel.optimize(
optimizer="bfgs",
max_iters=self.max_iters,
messages=False,
ipython_notebook=False,
)
else:
self._gpmodel.optimize_restarts(
num_restarts=self.optimize_restarts,
optimizer="bfgs",
max_iters=self.max_iters,
verbose=False,
)
self._gpmodel.fit(X, y)

# store minimum
if self.acquisition == "expected_improvement":
m, _ = self._predict_mean_std(X)
self._fmin = np.min(m)

def predict(self, X: NDArray[np.float64]) -> NDArray[np.float64]:
"""Predict using a gaussian process surrogate model."""
# predict mean or expected improvement on the full sample set
if self.acquisition == _AcquisitionTypes.EI.value:
# minus sign needed for subsequent sorting
candidates_score = -self._predict_EI(X)[:, 0]
candidates_score = -self._predict_EI(X, self.jitter)
else: # acquisition is "mean"
candidates_score = self._predict_mean_std(X)[0][:, 0]
candidates_score = self._predict_mean_std(X)[0]

return candidates_score

Expand All @@ -169,20 +154,13 @@ def _predict_mean_std(
Returns:
The pair (mean, std).
"""
gpmodel = cast(GPRegression, self._gpmodel)
gpmodel = cast(GaussianProcessRegressor, self._gpmodel)
X = X[None, :] if X.ndim == 1 else X
m, v = gpmodel.predict(X, full_cov=False, include_likelihood=True)
v = np.clip(v, 1e-10, np.inf)
return m, np.sqrt(v)

def _get_fmin(self) -> float:
"""Return the location where the posterior mean is takes its minimal value."""
gpmodel = cast(GPRegression, self._gpmodel)
return gpmodel.predict(gpmodel.X)[0].min()

def _predict_EI(
self, X: NDArray[np.float64], jitter: float = 0.1
) -> NDArray[np.float64]:
m, s = gpmodel.predict(X, return_std=True, return_cov=False)
s = np.clip(s, 1e-5, np.inf)
return m, s

def _predict_EI(self, X: NDArray[np.float64], jitter: float) -> NDArray[np.float64]:
"""
Compute the Expected Improvement per unit of cost.
Expand All @@ -195,7 +173,7 @@ def _predict_EI(
"""
m, s = self._predict_mean_std(X)

fmin = self._get_fmin()
fmin = cast(float, self._fmin)

phi, Phi, u = self.get_quantiles(jitter, fmin, m, s)

Expand Down Expand Up @@ -223,7 +201,7 @@ def get_quantiles(
the quantiles.
"""
# remove values of variance that are too small
s[s < 1e-10] = 1e-10
s[s < 1e-5] = 1e-5

u: NDArray[np.float64] = (fmin - m - acquisition_par) / s
phi: NDArray[np.float64] = np.exp(-0.5 * u**2) / np.sqrt(2 * np.pi)
Expand Down
81 changes: 30 additions & 51 deletions examples/overview_of_the_different_samplers.ipynb

Large diffs are not rendered by default.

101 changes: 3 additions & 98 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ classifiers = [

[tool.poetry.dependencies]
python = ">=3.8,<3.11"
GPy = {git = "https://github.com/SheffieldML/GPy.git", rev = "f63ed48"}
ipywidgets = ">=8.0.0,<8.0.5"
matplotlib = "^3.7.1"
numpy = ">=1.23.3,<1.24.0"
Expand Down
1 change: 1 addition & 0 deletions scripts/whitelists/package_whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
batch_id # unused variable (black_it/schedulers/base.py:77)
batch_id # unused variable (black_it/schedulers/round_robin.py:47)
MABEpsilonGreedy # unused class (black_it/schedulers/rl/agents/epsilon_greedy.py:26)
seed # unused variable (black_it/schedulers/rl/envs/base.py:59)
_.render # unused method (black_it/schedulers/rl/envs/base.py:80)
MABCalibrationEnv # unused class (black_it/schedulers/rl/envs/mab.py:24)
RLScheduler # unused class (black_it/schedulers/rl/rl_scheduler.py:32)
Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ python_version = 3.8
strict_optional = True
plugins = numpy.typing.mypy_plugin
exclude = examples/models.*
disallow_untyped_defs=true
disallow_untyped_defs = true

[darglint]
docstring_style=google
Expand Down Expand Up @@ -94,6 +94,9 @@ ignore_missing_imports = True
[mypy-sklearn.ensemble]
ignore_missing_imports = True

[mypy-sklearn.gaussian_process]
ignore_missing_imports = True

[mypy-seaborn.*]
ignore_missing_imports = True

Expand Down
Loading

0 comments on commit 1c4a3b6

Please sign in to comment.