Source code for btb.tuning.metamodels.gaussian_process
# -*- coding: utf-8 -*-
import numpy
import scipy
from copulas import EPSILON
from copulas.univariate import Univariate
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from btb.tuning.metamodels.base import BaseMetaModel
[docs]class GaussianProcessMetaModel(BaseMetaModel):
"""GaussianProcessMetaModel class.
This class represents a meta-model using an underlying ``GaussianProcessRegressor`` from
``sklearn.gaussian_process``.
Attributes:
_MODEL_KWARGS (dict):
Dictionary with the default ``kwargs`` for the ``GaussianProcessRegressor``
instantiation.
_MODEL_CLASS (type):
Class to be instantiated and used for the ``self._model`` instantiation. In
this case ``sklearn.gaussian_process.GaussainProcessRegressor``
"""
_MODEL_CLASS = GaussianProcessRegressor
_MODEL_KWARGS_DEFAULT = {
'normalize_y': True
}
def __init_metamodel__(self, length_scale=1):
if self._model_kwargs is None:
self._model_kwargs = {}
self._model_kwargs['kernel'] = RBF(length_scale=length_scale)
def _predict(self, candidates):
predictions = self._model_instance.predict(candidates, return_std=True)
return numpy.column_stack(predictions)
[docs]class GaussianCopulaProcessMetaModel(GaussianProcessMetaModel):
"""GaussianCopulaProcessMetaModel class.
This class represents a meta-model using an underlying ``GaussianProcessRegressor`` from
``sklearn.gaussian_process`` applying ``copulas.univariate.Univariate`` transformations
to the input data and afterwards reverts it for the predictions.
During the ``fit`` process, this metamodel trains a univariate copula for each
hyperparameter to then compute the cumulative distribution of these. Once the cumulative
distribution has been calculated, we calculate the inverse of the normal cumulative
distribution using ``scipy.stats.norm`` and use these transformations to train the
``GaussianProcessRegressor`` model.
When predicting the output value, an inverse of the normal cumulative distribution is
computed to the normal cumulative distribution, using the previously trained univariate
copula with the input data of the score.
Attributes:
_MODEL_KWARGS (dict):
Dictionary with the default ``kwargs`` for the ``GaussianProcessRegressor``
instantiation.
_MODEL_CLASS (type):
Class to be instantiated and used for the ``self._model`` instantiation. In
this case ``sklearn.gaussian_process.GaussainProcessRegressor``
"""
def _transform(self, trials):
transformed = []
for column, distribution in zip(trials.T, self._distributions):
transformed.append(
scipy.stats.norm.ppf(distribution.cdf(column).clip(0 + EPSILON, 1 - EPSILON))
)
return numpy.column_stack(transformed)
def _fit(self, trials, scores):
self._distributions = []
for column in trials.T:
distribution = Univariate()
distribution.fit(column)
self._distributions.append(distribution)
distribution = Univariate()
distribution.fit(scores)
self._score_distribution = distribution
trans_trials = self._transform(trials)
trans_scores = scipy.stats.norm.ppf(
self._score_distribution.cdf(scores).clip(0 + EPSILON, 1 - EPSILON)
)
super()._fit(trans_trials, trans_scores)
def _predict(self, candidates):
trans_candidates = self._transform(candidates)
predicted = super()._predict(trans_candidates)
return self._score_distribution.ppf(scipy.stats.norm.cdf(predicted))