Source code for btb.selection.pure

import logging

from btb.selection.selector import Selector

# the minimum number of scores that each choice must have in order to use best-K
# optimizations. If not all choices meet this threshold, default UCB1 selection
# will be used.
K_MIN = 3

logger = logging.getLogger('btb')


[docs]class PureBestKVelocity(Selector): """Pure Best K Velocity Selector Simply returns the choice with the best best-K velocity. """ def __init__(self, choices, k=K_MIN): super(PureBestKVelocity, self).__init__(choices) self.k = k
[docs] def compute_rewards(self, scores): """ Compute the "velocity" of (average distance between) the k+1 best scores. Return a list with those k velocities padded out with zeros so that the count remains the same. """ # get the k + 1 best scores in descending order best_scores = sorted(scores, reverse=True)[:self.k + 1] velocities = [best_scores[i] - best_scores[i + 1] for i in range(len(best_scores) - 1)] # pad the list out with zeros to maintain the length of the list zeros = (len(scores) - self.k) * [0] return velocities + zeros
[docs] def select(self, choice_scores): """ Select the choice with the highest best-K velocity. If any choices don't have MIN_K scores yet, return the one with the fewest. """ # if we don't have enough scores to do K-selection, fall back to UCB1 min_num_scores = min([len(s) for s in choice_scores.values()]) if min_num_scores >= K_MIN: logger.info('PureBestKVelocity: using Pure Best K velocity selection') reward_func = self.compute_rewards else: logger.warning( '{klass}: Not enough choices to do K-selection; ' 'returning choice with fewest scores' .format(klass=type(self).__name__)) # reward choices with the fewest scores # NOTE: "reward_func = lambda " changed to "def reward_func" # as per flake8 suggestions # reward_func = lambda s: [1] if len(s) == min_num_scores else [0] def reward_func(scores): return [1] if len(scores) == min_num_scores else [0] choice_rewards = {} for choice, scores in choice_scores.items(): if choice not in self.choices: continue choice_rewards[choice] = reward_func(scores) # the default bandit returns the choice with the highest mean reward return self.bandit(choice_rewards)