Source code for btb.session

# -*- coding: utf-8 -*-

import itertools
import json
import logging
from collections import Counter, defaultdict
from hashlib import md5

import numpy as np
from tqdm.auto import trange

from btb.selection.ucb1 import UCB1
from btb.tuning.tunable import Tunable
from btb.tuning.tuners.base import StopTuning
from btb.tuning.tuners.gaussian_process import GPTuner

LOGGER = logging.getLogger(__name__)


[docs]class BTBSession: """BTBSession class. A ``BTBSession`` represents the process of selecting and tuning several tunables until the best possible configuration for a specific ``scorer`` is found. For this, a loop is run in which for each iteration a combination of a ``Selector`` and ``Tuner`` is used to decide which tunable to score next and with which hyperparameters. While running, the ``BTBSession`` handles the errors discarding, if configured to do so, the tunables that have reached as many errors as the user specified. Attributes: best_proposal (dict): Best configuration found with the name of the tunable and the hyperparameters and crossvalidated score obtained for it. best_score (float): Best score obtained for this session so far. proposals (dict): Dictionary containing all the proposals generated by the ``BTBSession``. iterations (int): Amount of iterations run. errors (Counter): A Counter of the errors that each Tunable had during the session. Args: tunables (dict): Python dictionary that has as keys the name of the tunable and as value a dictionary with the tunable hyperparameters or an ``btb.tuning.tunable.Tunable`` instance. scorer (callable object / function): A callable object or function with signature ``scorer(tunable_name, config)`` wich should return only a single value. tuner_class (btb.tuning.tuner.BaseTuner): A tuner based on BTB ``BaseTuner`` class. This tuner will manage the new proposals. Defaults to ``btb.tuning.tuners.gaussian_process.GPTuner`` selector_class (btb.selection.selector.Selector): A selector based on BTB ``Selector`` class. This will determinate which one of the tunables is performing better, and which one to test next. Defaults to ``btb.selection.selectors.ucb1.UCB1`` maximize (bool): If ``True`` the scores are interpreted as bigger is better, if ``False`` then smaller is better, this should depend on the problem type (maximization or minimization). Defaults to ``True``. max_erors (int): Amount of errors allowed for a tunable to not generate a score. Once this amount of errors is reached, the tunable will be removed from the list. Defaults to 1. verbose (bool): If ``True`` a progress bar will be displayed for the ``run`` process. """ _tunables = None _scorer = None _tuner_class = None _selector = None _maximize = None _max_errors = None _best_normalized = None _tunable_names = None _normalized_scores = None _tuners = None _range = None best_proposal = None best_score = None proposals = None iterations = None errors = None def _normalize(self, score): if score is not None: return score if self._maximize else -score def __init__(self, tunables, scorer, tuner_class=GPTuner, selector_class=UCB1, maximize=True, max_errors=1, verbose=False): self._tunables = tunables self._scorer = scorer self._tuner_class = tuner_class self._tunable_names = list(self._tunables.keys()) self._selector = selector_class(self._tunable_names) self._maximize = maximize self._max_errors = max_errors self.best_proposal = None self.proposals = dict() self.iterations = 0 self.errors = Counter() self.best_score = None self._best_normalized = -np.inf self._normalized_scores = defaultdict(list) self._tuners = dict() self._range = trange if verbose else range def _make_dumpable(self, to_dump): dumpable = {} for key, value in to_dump.items(): if not isinstance(key, str): key = str(key) if isinstance(value, np.integer): value = int(value) elif isinstance(value, np.floating): value = float(value) elif isinstance(value, np.ndarray): value = value.tolist() elif isinstance(value, np.bool_): value = bool(value) elif value == 'None': value = None dumpable[key] = value return dumpable def _make_id(self, name, config): dumpable_config = self._make_dumpable(config) proposal = { 'name': name, 'config': dumpable_config, } hashable = json.dumps(proposal, sort_keys=True).encode() return md5(hashable).hexdigest() def _remove_tunable(self, tunable_name): """Remove a tunable from the candidates list. This is necessary when: - Duplicates are not allowed and the tunable has exhausted all its configurations. - The tunable has failed more than ``max_errors`` times. When this happens, the tunable is removved from the tunables dict and its scores are removed from the normmalized_scores dict used by the selectors. """ self._normalized_scores.pop(tunable_name, None) self._tunables.pop(tunable_name, None) def _get_next_tunable_name(self): if self._normalized_scores: tunable_name = self._selector.select(self._normalized_scores) else: # if _normalized_scores is still empty the selector crashes # this happens when max_errors > 1, all tunables have tuners # and all previous trials have crashed. tunable_name = np.random.choice(list(self._tunables.keys())) return tunable_name
[docs] def propose(self): """Propose a new configuration to score. Every time ``propose`` is called, a new tunable will be selected and a new hyperparameter proposal will be generated for it. At the begining, the default hyperparameters of each one of the tunables will be returned sequencially in the same order as they were passed to the ``BTBSession``. After that, once each tunable has been scored at least once, the tunable used to generate the new proposals will be selected optimally each time by the selector. If a tunable runs out of proposals, it will be discarded from the list and will not be proposed again. Finally, when all the tunables have ran out of proposals, a ``StopTuning`` exception will be raised. Returns: tuple (str, dict): * Name of the tunable to try next. * Hyperparameters proposal. Raises: StopTuning: If the ``BTBSession`` has run out of proposals to generate. """ if not self._tunables: raise StopTuning('There are no tunables left to try.') if len(self._tuners) < len(self._tunable_names): tunable_name = self._tunable_names[len(self._tuners)] tunable = self._tunables[tunable_name] if isinstance(tunable, dict): LOGGER.info('Creating Tunable instance from dict.') tunable = Tunable.from_dict(tunable) if not isinstance(tunable, Tunable): raise TypeError('Tunable can only be an instance of btb.tuning.Tunable or dict') LOGGER.info('Obtaining default configuration for %s', tunable_name) config = tunable.get_defaults() if tunable.cardinality == 1: LOGGER.warn('Skipping tuner creation for Tunable %s with cardinality 1', tunable_name) tuner = None else: tuner = self._tuner_class(tunable) self._tuners[tunable_name] = tuner else: tunable_name = self._get_next_tunable_name() tuner = self._tuners[tunable_name] try: if tuner is None: raise StopTuning('Tunable %s has no tunable hyperparameters', tunable_name) LOGGER.info('Generating new proposal configuration for %s', tunable_name) config = tuner.propose(1) except StopTuning: LOGGER.info('%s has no more configs to propose.', tunable_name) self._remove_tunable(tunable_name) tunable_name, config = self.propose() proposal_id = self._make_id(tunable_name, config) self.proposals[proposal_id] = { 'id': proposal_id, 'name': tunable_name, 'config': config } return tunable_name, config
[docs] def handle_error(self, tunable_name): """Handle errors when ``score`` is ``None``. If the given ``tunable_name`` accumulates more errors than ``self._max_errors`` this is removed from the selector's choices. Args: tunable_name (str): The name of the tunable to which this configuration belongs. """ self.errors[tunable_name] += 1 errors = self.errors[tunable_name] if errors >= self._max_errors: LOGGER.warning('Too many errors: %s. Removing tunable %s', errors, tunable_name) self._remove_tunable(tunable_name)
[docs] def record(self, tunable_name, config, score): """Record the configuration and the obtained score to the tuner. If the score is the best one so far, the ``best_proposal`` and ``best_score`` are updated. Args: tunable_name (str): The name of the tunable to which this configuration belongs. config (dict): Hyperparameter proposal, as given by the tunable. score (float): Obtained score with the given configuration. """ proposal_id = self._make_id(tunable_name, config) proposal = self.proposals[proposal_id] proposal['score'] = score if score is None: self.handle_error(tunable_name) else: normalized = self._normalize(score) self._normalized_scores[tunable_name].append(normalized) if normalized > self._best_normalized: LOGGER.info('New optimal found: %s - %s', tunable_name, score) self.best_proposal = proposal self.best_score = score self._best_normalized = normalized try: tuner = self._tuners[tunable_name] if tuner is None: LOGGER.warn('Skipping record for Tunable %s with cardinality 1', tunable_name) else: tuner.record(config, normalized) except Exception: LOGGER.exception('Could not record configuration and score for tuner %s.', tunable_name)
[docs] def run(self, iterations=None): """Run the selection and tuning loop for the given number of iterations. At each iteration, the `BTBSession` will generate a new proposal calling ``self.propose``, score it using the `self.scorer`, and finally record the obtained score back to the tuner calling `self.record`. If no iterations are given, run infinitely until interrupted or until all the tuner proposals are exhausted. Scoring errors will also be captured and recorded. Returns: best_proposal (dict): Best configuration found with the name of the tunable and the hyperparameters and crossvalidated score obtained for it. """ if iterations is None: iterator = itertools.count() else: iterator = self._range(iterations) for _ in iterator: self.iterations += 1 tunable_name, config = self.propose() try: LOGGER.debug('Scoring proposal %s - %s: %s', self.iterations, tunable_name, config) score = self._scorer(tunable_name, config) except Exception: params = '\n'.join('{}: {}'.format(k, v) for k, v in config.items()) LOGGER.exception( 'Proposal %s - %s crashed with the following configuration: %s', self.iterations, tunable_name, params ) score = None self.record(tunable_name, config, score) return self.best_proposal