Source code for delira.training.predictor

import logging
import copy

import numpy as np
from tqdm import tqdm

from ..data_loading import BaseDataManager
from .train_utils import convert_batch_to_numpy_identity
from ..utils.config import LookupConfig

logger = logging.getLogger(__name__)


[docs]class Predictor(object): """ Defines an API for Predictions from a Network See Also -------- :class:`PyTorchNetworkTrainer` """ # static variable to prevent certain attributers from overwriting __KEYS_TO_GUARD = [] def __init__( self, model, key_mapping: dict, convert_batch_to_npy_fn=convert_batch_to_numpy_identity, prepare_batch_fn=lambda x: x, **kwargs): """ Parameters ---------- model : :class:`AbstractNetwork` the model to predict from key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` convert_batch_args_kwargs_to_npy_fn : type, optional a callable function to convert tensors in positional and keyword arguments to numpy; default: identity function prepare_batch_fn : type, optional function converting a batch-tensor to the framework specific tensor-type and pushing it to correct device, default: identity function **kwargs : additional keyword arguments """ self._setup(model, key_mapping, convert_batch_to_npy_fn, prepare_batch_fn, **kwargs) self._tqdm_desc = "Test"
[docs] def _setup(self, network, key_mapping, convert_batch_args_kwargs_to_npy_fn, prepare_batch_fn, **kwargs): """ Parameters ---------- network : :class:`AbstractNetwork` the network to predict from key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` convert_batch_to_npy_fn : type a callable function to convert tensors in positional and keyword arguments to numpy prepare_batch_fn : type function converting a batch-tensor to the framework specific tensor-type and pushing it to correct device, default: identity function """ self.module = network self.key_mapping = key_mapping self._convert_to_npy_fn = convert_batch_args_kwargs_to_npy_fn self._prepare_batch = prepare_batch_fn
def __call__(self, data: dict, **kwargs): """ Method to call the class. Returns the predictions corresponding to the given data obtained by the model Parameters ---------- data : dict batch dictionary Returns ------- dict predicted data """ return self.predict(data, **kwargs)
[docs] def predict(self, data: dict, **kwargs): """ Predict single batch Returns the predictions corresponding to the given data obtained by the model Parameters ---------- data : dict batch dictionary **kwargs : keyword arguments(directly passed to ``prepare_batch``) Returns ------- dict predicted data """ data = self._prepare_batch(data, **kwargs) mapped_data = { k: data[v] for k, v in self.key_mapping.items()} pred = self.module( **mapped_data ) # converts positional arguments and keyword arguments, # but returns only keyword arguments, since positional # arguments are not given. return self._convert_to_npy_fn( **pred )[1]
[docs] def predict_data_mgr(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator without explicitly caching anything Parameters ---------- datamgr : :class:`BaseDataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all predictions of the current batch dict a dictionary containing all metrics of the current batch """ if metrics is None: metrics = {} orig_num_aug_processes = datamgr.n_process_augmentation orig_batch_size = datamgr.batch_size if batchsize is None: batchsize = orig_batch_size datamgr.batch_size = 1 datamgr.n_process_augmentation = 1 batchgen = datamgr.get_batchgen() n_batches = batchgen.num_batches if verbose: iterable = tqdm(enumerate(batchgen), unit=' sample', total=n_batches, desc=self._tqdm_desc) else: iterable = enumerate(batchgen) batch_list = [] for i, batch in iterable: if not batch_list and (n_batches - i) < batchsize: batchsize = n_batches - i logger.debug("Set Batchsize down to %d to avoid cutting " "of the last batches" % batchsize) batch_list.append(batch) # if queue is full process queue: if batchsize is None or len(batch_list) >= batchsize: batch_dict = {} for _batch in batch_list: for key, val in _batch.items(): if key in batch_dict.keys(): batch_dict[key].append(val) else: batch_dict[key] = [val] for key, val_list in batch_dict.items(): batch_dict[key] = np.concatenate(val_list) preds = self.predict(copy.copy(batch_dict), **kwargs) # convert batchdict back to numpy (self.predict may convert it # to backend-specific tensor type) - no-op if already numpy batch_dict = self._convert_to_npy_fn(**batch_dict)[1] preds_batch = LookupConfig() preds_batch.update(batch_dict) preds_batch.update(preds) # calculate metrics for predicted batch _metric_vals = self.calc_metrics(preds_batch, metrics=metrics, metric_keys=metric_keys) yield preds, _metric_vals batch_list = [] batchgen._finish() datamgr.batch_size = orig_batch_size datamgr.n_process_augmentation = orig_num_aug_processes return
[docs] def predict_data_mgr_cache_metrics_only(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator and caches the metrics Parameters ---------- datamgr : :class:`BaseDataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all validation metrics (maybe empty) Notes ----- This function stores each prediction temporarily for metric calculation; This results in a (typically) way lower memory consumption than :meth:`Predictor.predict_data_mgr_cache_all`, but still caches the metrics. If this is not desired, it is recommended to use :meth:`Predictor.predict_data_mgr` and iterate over the generator as this only produces per-batch metrics and predictions and does not cache anything by default """ if metrics is None: metrics = {} yield from self.predict_data_mgr_cache(datamgr=datamgr, batchsize=batchsize, metrics=metrics, metric_keys=metric_keys, verbose=verbose, cache_preds=False, **kwargs) return
[docs] def predict_data_mgr_cache_all(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator and caches all predictions and metrics (yields them in dicts) Parameters ---------- datamgr : :class:`BaseDataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all predictions; dict a dictionary containing all validation metrics (maybe empty) Warnings -------- Since this function caches all predictions and metrics, this may result in huge memory consumption. If you are running out of memory, please have a look at :meth:`Predictor.predict_data_mgr_cache_metrics_only` or :meth:`Predictor.predict_data_mgr` """ if metrics is None: metrics = {} yield from self.predict_data_mgr_cache(datamgr=datamgr, batchsize=batchsize, metrics=metrics, metric_keys=metric_keys, verbose=verbose, cache_preds=True, **kwargs) return
[docs] def predict_data_mgr_cache(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, cache_preds=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator and caches all predictions and metrics (yields them in dicts) Parameters ---------- datamgr : :class:`BaseDataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False cache_preds : bool whether to also cache predictions kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all validation metrics (maybe empty) dict a dictionary containing all predictions; If ``cache_preds=True`` Warnings -------- Since this function caches all metrics and may additionally cache all predictions (based on the argument ``cache_preds``), this may result in huge memory consumption. If you are running out of memory, please have a look at :meth:`Predictor.predict_data_mgr_cache_metrics_only` or :meth:`Predictor.predict_data_mgr` or consider setting ``cache_preds`` to ``False`` (if not done already) """ if metrics is None: metrics = {} predictions_all, metric_vals = [], {k: [] for k in metrics.keys()} for preds, _metric_vals in self.predict_data_mgr( datamgr=datamgr, batchsize=batchsize, metrics=metrics, metric_keys=metric_keys, verbose=verbose, **kwargs): if cache_preds: predictions_all.append(preds) for k, v in _metric_vals.items(): metric_vals[k].append(v) if cache_preds: # convert predictions from list of dicts to dict of lists new_predictions_all = {} # recursively convert all nested dicts for preds in predictions_all: new_predictions_all = self.__convert_dict(preds, new_predictions_all) # concatenate lists to single arrays preds_all = self.__concatenate_dict_items(new_predictions_all) else: preds_all = {} for k, v in metric_vals.items(): metric_vals[k] = np.array(v) if cache_preds: yield preds_all, metric_vals else: yield metric_vals return
@staticmethod def __convert_dict(old_dict, new_dict): """ Function to recursively convert dicts Parameters ---------- old_dict : dict the old nested dict new_dict : dict the new nested dict Returns ------- dict the updated new nested dict """ for k, v in old_dict.items(): # apply same function again on item if item is dict if isinstance(v, dict): if k not in new_dict: new_dict[k] = {} new_dict[k] = Predictor.__convert_dict(v, new_dict[k]) else: # check if v is scalar and convert to npy-array if # necessary. # Otherwise concatenation might fail if np.isscalar(v): v = np.array(v) # check for zero-sized arrays and reshape if necessary. # Otherwise concatenation might fail if v.shape == (): v = v.reshape(1) if k in new_dict: new_dict[k].append(v) else: new_dict[k] = [v] return new_dict @staticmethod def __concatenate_dict_items(dict_like: dict): """ Function to recursively concatenate dict-items Parameters ---------- dict_like : dict the (nested) dict, whoose items should be concatenated Returns ------- """ for k, v in dict_like.items(): if isinstance(v, dict): v = Predictor.__concatenate_dict_items(v) else: v = np.concatenate(v) dict_like[k] = v return dict_like def __setattr__(self, key, value): """ Set attributes and guard specific attributes after they have been set once Parameters ---------- key : str the attributes name value : Any the value to set Raises ------ PermissionError If attribute which should be set is guarded """ # check if key has been set once if key in self.__KEYS_TO_GUARD and hasattr(self, key): raise PermissionError("%s should not be overwritten after " "it has been set once" % key) else: super().__setattr__(key, value)
[docs] @staticmethod def calc_metrics(batch: LookupConfig, metrics=None, metric_keys=None): """ Compute metrics Parameters ---------- batch: LookupConfig dictionary containing the whole batch (including predictions) metrics: dict dict with metrics metric_keys : dict dict of tuples which contains hashables for specifying the items to use for calculating the respective metric. If not specified for a metric, the keys "pred" and "label" are used per default Returns ------- dict dict with metric results """ if metrics is None: metrics = {} if metric_keys is None: metric_keys = {k: ("pred", "label") for k in metrics.keys()} return {key: metric_fn(*[batch.nested_get(k) for k in metric_keys[key]]) for key, metric_fn in metrics.items()}