Source code for btb.tuning.tuners.base

# -*- coding: utf-8 -*-

"""Package where the BaseTuner class and BaseMetaModelTuner are defined."""

import logging
from abc import abstractmethod

import numpy as np

from btb.tuning.acquisition.base import BaseAcquisition
from btb.tuning.metamodels.base import BaseMetaModel

LOGGER = logging.getLogger(__name__)


[docs]class StopTuning(Exception): pass
[docs]class BaseTuner: """BaseTuner class. BaseTuner class is the abstract representation of a tuner that is not based on a model. Attributes: tunable (btb.tuning.tunable.Tunable): Instance of a tunable class containing hyperparameters to be tuned. trials (numpy.ndarray): A ``numpy.ndarray`` with shape ``(n, self.tunable.dimensions)`` where ``n`` is the number of trials recorded. raw_scores (numpy.ndarray): A ``numpy.ndarray`` with shape ``(n, 1)`` where ``n`` is the number of scores recorded. scores (numpy.ndarray): A ``numpy.ndarray`` with shape ``(n, 1)`` where ``n`` is the number of normalized scores recorded. Args: tunable (btb.tuning.tunable.Tunable): Instance of a tunable class containing hyperparameters to be tuned. maximize (bool): If ``True`` the scores are interpreted as bigger is better, if ``False`` then smaller is better. Defaults to ``True``. """ def __init__(self, tunable, maximize=True): self.tunable = tunable self.trials = np.empty((0, self.tunable.dimensions), dtype=np.float) self._trials_set = set() self.raw_scores = np.empty((0, 1), dtype=np.float) self.scores = np.empty((0, 1), dtype=np.float) self.maximize = maximize LOGGER.debug( ('Creating %s instance with %s hyperparameters and cardinality %s.'), self.__class__.__name__, len(self.tunable.hyperparams), self.tunable.cardinality ) def _check_proposals(self, num_proposals): """Validate ``num_proposals`` with ``self.tunable.cardinality`` and ``self.trials``. Raises: StopTuning: A ``StopTuning`` exception is being produced if the amount of requested proposals is bigger than the possible combinations and ``allow_duplicates`` is ``False``. StopTuning: A ``StopTuning`` exception is being produced if the unique amount of recorded trials is the same as the amount of combinations available for ``self.tunable``. StopTuning: A ``StopTuning`` exception is being produced if the unique amount of recorded trials is the same as the amount of combinations available for ``self.tunable``. """ if num_proposals > self.tunable.cardinality: raise StopTuning( 'The number of proposals requested is bigger than the combinations: {} of the' '``tunable``. Use ``allow_duplicates=True``, if you would like to generate that' 'amount of combinations.'.format(self.tunable.cardinality) ) num_tried = len(self._trials_set) if num_tried == self.tunable.cardinality: raise StopTuning( 'All of the possible combinations where recorded. Use ``allow_duplicates=True``' 'to keep generating combinations.' ) if num_tried + num_proposals > self.tunable.cardinality: raise StopTuning( 'The maximum amount of new proposed combinations will exceed the amount of' 'possible combinations, either use ``num_proposals={}`` to generate the remaining' 'combinations or ``allow_duplicates=True`` to keep generating more' 'combinations.'.format(self.tunable.cardinality - num_tried) ) def _sample(self, num_proposals, allow_duplicates): """Generate a ``numpy.ndarray`` of valid proposals. Generates ``num_proposals`` of valid combinations by generating ``proposals`` until ``len(valid_proposals) == num_proposals`` different from the ones that have been recorded. Args: num_proposals (int): Amount of proposals to generate. allow_duplicates (bool): If it's ``False``, the tuner will propose trials that are not recorded. Otherwise will generate trials that may have been already recorded. Returns: numpy.ndarray: A ``numpy.ndarray`` with shape ``(num_proposals, self.tunable.dimensions)``. """ if allow_duplicates: return self.tunable.sample(num_proposals) else: valid_proposals = set() while len(valid_proposals) < num_proposals: proposals = self.tunable.sample(num_proposals) proposals = set(map(tuple, proposals)) valid_proposals.update(proposals - self._trials_set) return np.asarray(list(valid_proposals))[:num_proposals] @abstractmethod def _propose(self, num_proposals, allow_duplicates): """Generate ``num_proposals`` number of candidates. Args: num_proposals (int): Number of candidates to create. allow_duplicates (bool): If it's ``False``, the tuner will propose trials that are not recorded. Otherwise will generate trials that can be repeated. Returns: numpy.ndarray: It returns ``numpy.ndarray`` with shape ``(num_proposals, len(self.tunable.hyperparameters)``. """ pass
[docs] def propose(self, n=1, allow_duplicates=False): """Propose one or more new hyperparameter configurations. Validate that the amount of proposals requested is valid when ``allow_duplicates`` is ``False`` and raise an exception in case there is any missmatch between ``n``, unique ``self.trials`` and ``self.tunable.cardinality``. Call the implemented ``_propose`` method and convert the returned data in to hyperparameter space values. Args: n (int): Number of candidates to create. Defaults to 1. allow_duplicates (bool): If it's False, the tuner will propose trials that are not recorded. Otherwise will generate trials that can be repeated. Defaults to ``False``. Returns: dict or list: If ``n`` is 1, a ``dict`` will be returned containing the hyperparameter names and values. Otherwise, if ``n`` is bigger than 1, a list of such dicts is returned. Raises: ValueError: A ``ValueError`` exception is being produced if the amount of requested proposals is bigger than the possible combinations and ``allow_duplicates`` is ``False``. ValueError: A ``ValueError`` exception is being produced if the unique amount of recorded trials is the same as the amount of combinations available for ``self.tunable``. ValueError: A ``ValueError`` exception is being produced if the unique amount of recorded trials is the same as the amount of combinations available for ``self.tunable``. Example: The example below shows simple usage case where an ``UniformTuner`` is being imported, instantiated with a ``tunable`` object and it's method propose is being called three times, first with a single proposal, a second with two proposals forcing them to be different and once where the values can be repeated. >>> from btb.tuning.tunable import Tunable >>> from btb.tuning.hyperparams import BooleanHyperParam >>> from btb.tuning.hyperparams import CategoricalHyperParam >>> from btb.tuning.tuners import UniformTuner >>> bhp = BooleanHyperParam() >>> chp = CategoricalHyperParam(['cat', 'dog']) >>> tunable = Tunable({'bhp': bhp, 'chp': chp}) >>> tuner = UniformTuner(tunable) >>> tuner.propose(1) {'bhp': True, 'chp': 'dog'} >>> tuner.propose(2) [{'bhp': True, 'chp': 'cat'}, {'bhp': True, 'chp': 'dog'}] >>> tuner.propose(2, allow_duplicates=True) [{'bhp': False, 'chp': 'dog'}, {'bhp': False, 'chp': 'dog'}] """ if not allow_duplicates: self._check_proposals(n) proposed = self._propose(n, allow_duplicates) hyperparameters = self.tunable.inverse_transform(proposed) hyperparameters = hyperparameters.to_dict(orient='records') if n == 1: hyperparameters = hyperparameters[0] return hyperparameters
[docs] def record(self, trials, scores): """Record one or more ``trials`` with the associated ``scores``. ``Trials`` are recorded with their associated ``scores``. The amount of trials must be equal to the amount of scores recived and vice versa. Args: trials (pandas.DataFrame, pandas.Series, dict, list(dict), 2D array-like): Values of shape ``(n, len(self.tunable.hyperparameters))`` or dict with keys that are ``self.tunable.names``. scores (single value or array-like): A single value or array-like of values representing the score achieved with the trials. Raises: ValueError: A ``ValueError`` exception is being produced if ``len(trials)`` is not equal to ``len(scores)``. Example: The example below shows simple usage case where an ``UniformTuner`` is being imported, instantiated with a ``tunable`` object and it's method record is being called two times with valid trials and scores. >>> from btb.tuning.tunable import Tunable >>> from btb.tuning.hyperparams import BooleanHyperParam >>> from btb.tuning.hyperparams import CategoricalHyperParam >>> from btb.tuning.tuners import UniformTuner >>> bhp = BooleanHyperParam() >>> chp = CategoricalHyperParam(['cat', 'dog']) >>> tunable = Tunable({'bhp': bhp, 'chp': chp}) >>> tuner = UniformTuner(tunable) >>> tuner.record({'bhp': True, 'chp': 'cat'}, 0.8) >>> trials = [{'bhp': False, 'chp': 'cat'}, {'bhp': True, 'chp': 'dog'}] >>> scores = [0.8, 0.1] >>> tuner.record(trials, scores) """ trials = self.tunable.transform(trials) scores = scores if isinstance(scores, (list, np.ndarray)) else [scores] if len(trials) != len(scores): raise ValueError('The amount of trials must be equal to the amount of scores.') self.trials = np.append(self.trials, trials, axis=0) self._trials_set.update(map(tuple, trials)) self.raw_scores = np.append(self.raw_scores, scores) self.scores = self.raw_scores if self.maximize else -self.raw_scores
def __str__(self): return ( "{}\n" " hyperparameters: {}\n" " dimensions: {}\n" " cardinality: {}" ).format( self.__class__.__name__, len(self.tunable.hyperparams), self.tunable.dimensions, self.tunable.cardinality )
[docs]class BaseMetaModelTuner(BaseTuner, BaseMetaModel, BaseAcquisition): """BaseMetaModelTuner class. BaseMetaModelTuner class is the abstract representation of a tuner that is based on a model and an ``Acquisition``. This model will try to `predict` the score that will be obtained with the proposed parameters by being trained over the ``self.trials`` and ``self.raw_scores`` recorded by the user. Attributes: tunable (btb.tuning.tunable.Tunable): Instance of a tunable class containing hyperparameters to be tuned. trials (numpy.ndarray): A ``numpy.ndarray`` with shape ``(n, self.tunable.dimensions)`` where ``n`` is the number of trials recorded. scores (numpy.ndarray): A ``numpy.ndarray`` with shape ``(n, 1)`` where ``n`` is the number of scores recorded. Args: tunable (btb.tuning.tunable.Tunable): Instance of a tunable class containing hyperparameters to be tuned. num_candidates (int): Number of samples to generate and select the best of it for each proposal. Defaults to 1000. maximize (bool): If ``True`` the model will understand that the score bigger is better, if ``False`` the smaller is better. Defaults to ``True``. min_trials (int): Number of recorded ``trials`` needed to perform a fitting over the model. Defaults to 5. """ _metamodel_kwargs = None _acquisition_kwargs = None def __init__(self, tunable, maximize=True, num_candidates=1000, min_trials=5): self.num_candidates = num_candidates self.min_trials = min_trials super().__init__(tunable, maximize) self.__init_metamodel__(**(self._metamodel_kwargs or dict())) self.__init_acquisition__(**(self._acquisition_kwargs or dict())) def _propose(self, num_proposals, allow_duplicates): if self.min_trials > len(self._trials_set): LOGGER.debug('Not enough samples recorded to generate predictions, ' 'generating random proposal.') return self._sample(num_proposals, allow_duplicates) num_samples = num_proposals * self.num_candidates if not allow_duplicates: remaining = self.tunable.cardinality - len(self._trials_set) num_samples = min(remaining, num_samples) proposals = self._sample(num_samples, allow_duplicates) predicted = self._predict(proposals) index = self._acquire(predicted, num_proposals) return proposals[index]
[docs] def record(self, trials, scores): """Record one or more ``trials`` with the associated ``scores`` and re-fit the model. ``Trials`` are recorded with the associated ``scores`` to them. The amount of trials must be equal to the amount of scores recived and vice versa. Once recorded, the ``model`` is being fitted with ``self.trials`` and ``self.raw_scores`` that contain any previous records and the ones that where just recorded. Args: trials (pandas.DataFrame, pandas.Series, dict, list(dict), 2D array-like): Values of shape ``(n, len(self.tunable.hyperparameters))`` or dict with keys that are ``self.tunable.names``. scores (single value or array-like): A single value or array-like of values representing the score achieved with the trials. Raises: ValueError: A ``ValueError`` exception is being produced if ``len(trials)`` is not equal to ``len(scores)``. Example: The example below shows simple usage case where an ``UniformTuner`` is being imported, instantiated with a ``tunable`` object and it's method record is being called two times with valid trials and scores. >>> from btb.tuning.tunable import Tunable >>> from btb.tuning.hyperparams import BooleanHyperParam >>> from btb.tuning.hyperparams import CategoricalHyperParam >>> from btb.tuning.tuners import UniformTuner >>> bhp = BooleanHyperParam() >>> chp = CategoricalHyperParam(['cat', 'dog']) >>> tunable = Tunable({'bhp': bhp, 'chp': chp}) >>> tuner = UniformTuner(tunable) >>> tuner.record({'bhp': True, 'chp': 'cat'}, 0.8) >>> trials = [{'bhp': False, 'chp': 'cat'}, {'bhp': True, 'chp': 'dog'}] >>> scores = [0.8, 0.1] >>> tuner.record(trials, scores) """ super().record(trials, scores) if len(self.trials) >= self.min_trials: LOGGER.debug('Fitting the model with %s samples.' % len(self.trials)) self._fit(self.trials, self.scores)