Source code for delira.data_loading.sampler.weighted_sampler

import numpy as np
from numpy.random import choice

from delira.data_loading.sampler.abstract_sampler import AbstractSampler
from delira.data_loading.dataset import AbstractDataset


[docs]class WeightedRandomSampler(AbstractSampler):
    """
    Implements Weighted Random Sampling

    """

    def __init__(self, indices, weights=None):
        """

        Parameters
        ----------
        indices : list
            list of classes each sample belongs to. List index corresponds to
            data index and the value at a certain index indicates the
             corresponding class
        weights : Any or None
            sampling weights; for more details see numpy.random.choice
            (parameter ``p``)

        """
        super().__init__(indices)

        self._indices = list(range(len(indices)))
        self._weights = weights
        self._global_index = 0

[docs]    @classmethod
    def from_dataset(cls, dataset: AbstractDataset, **kwargs):
        """

        Classmethod to initialize the sampler from a given dataset

        Parameters
        ----------
        dataset : AbstractDataset
            the given dataset

        Returns
        -------
        AbstractSampler
            The initialzed sampler

        """
        labels = [d['label'] for d in dataset]
        return cls(labels, **kwargs)

[docs]    def _get_indices(self, n_indices):
        """
        Actual Sampling

        Parameters
        ----------
        n_indices : int
            number of indices to return

        Returns
        -------
        list
            list of sampled indices

        Raises
        ------
        StopIteration
            If maximal number of samples is reached
        ValueError
            if weights or cum_weights don't match the population

        """
        n_indices = self._check_batchsize(n_indices)

        samples = choice(self._indices, size=n_indices, p=self._weights)

        return samples

    def __len__(self):
        return len(self._indices)


class WeightedPrevalenceRandomSampler(WeightedRandomSampler):
    def __init__(self, indices):
        """
        Implements random Per-Class Sampling and ensures uniform sampling
        of all classes

        Parameters
        ----------
        indices : array-like
            list of classes each sample belongs to. List index corresponds to
            data index and the value at a certain index indicates the
             corresponding class
        """
        weights = np.array(indices).astype(np.float)
        classes, classes_count = np.unique(indices, return_counts=True)

        # compute probabilities
        target_prob = 1 / classes.shape[0]

        # generate weight matrix
        for i, c in enumerate(classes):
            weights[weights == c] = (target_prob / classes_count[i])

        super().__init__(indices, weights=weights)