Source code for btb.selection.selector

import numpy as np


[docs]class Selector(object):
    """Base selector

    Args:
        choices (list): a list of discrete choices from which the selector must choose at every
            call to ``select``.
    """

    def __init__(self, choices):
        self.choices = choices

[docs]    def compute_rewards(self, scores):
        """Compute rewards from choice's scores

        Convert a list of scores associated with one choice into a list of rewards. Normally, the
        length of the list will be preserved, even if some of the scores are dropped.
        """
        return list(scores)

[docs]    def bandit(self, choice_rewards):
        """Return the choice to take next using multi-armed bandit

        Multi-armed bandit method. Accepts a mapping of choices to rewards which indicate their
        historical performance, and returns the choice that we should make next in order to
        maximize expected reward in the long term.

        The default implementation is to return the arm with the highest average score.

        Args:
            choice_rewards (Dict[object, List[float]]): maps choice IDs to lists of rewards.

        Returns:
            str: the name of the choice to take next.
        """
        return max(choice_rewards, key=lambda a: np.mean(choice_rewards[a]))

[docs]    def select(self, choice_scores):
        """Select the next best choice to make

        Args:
            choice_scores (Dict[object, List[float]]): Mapping of choice to list of scores for each
                possible choice. The caller is responsible for making sure each choice that is
                possible at this juncture is represented in the dict, even those with no scores.
                Score lists should be in ascending chronological order, that is, the score from the
                earliest trial should be listed first.

                For example::

                    {
                        1: [0.56, 0.61, 0.33, 0.67],
                        2: [0.25, 0.58],
                        3: [0.60, 0.65, 0.68],
                    }
        """
        choice_rewards = {}
        for choice, scores in choice_scores.items():
            if choice not in self.choices:
                continue

            choice_rewards[choice] = self.compute_rewards(scores)

        return self.bandit(choice_rewards)