Source code for pina.solver.ensemble_solver.ensemble_solver_interface

"""Module for the DeepEnsemble solver interface."""

import torch
from ..solver import MultiSolverInterface
from ...utils import check_consistency


[docs] class DeepEnsembleSolverInterface(MultiSolverInterface): r""" A class for handling ensemble models in a multi-solver training framework. It allows for manual optimization, as well as the ability to train, validate, and test multiple models as part of an ensemble. The ensemble dimension can be customized to control how outputs are stacked. By default, it is compatible with problems defined by :class:`~pina.problem.abstract_problem.AbstractProblem`, and users can choose the problem type the solver is meant to address. An ensemble model is constructed by combining multiple models that solve the same type of problem. Mathematically, this creates an implicit distribution :math:`p(\mathbf{u} \mid \mathbf{s})` over the possible outputs :math:`\mathbf{u}`, given the original input :math:`\mathbf{s}`. The models :math:`\mathcal{M}_{i\in (1,\dots,r)}` in the ensemble work collaboratively to capture different aspects of the data or task, with each model contributing a distinct prediction :math:`\mathbf{y}_{i}=\mathcal{M}_i(\mathbf{u} \mid \mathbf{s})`. By aggregating these predictions, the ensemble model can achieve greater robustness and accuracy compared to individual models, leveraging the diversity of the models to reduce overfitting and improve generalization. Furthemore, statistical metrics can be computed, e.g. the ensemble mean and variance: .. math:: \mathbf{\mu} = \frac{1}{N}\sum_{i=1}^r \mathbf{y}_{i} .. math:: \mathbf{\sigma^2} = \frac{1}{N}\sum_{i=1}^r (\mathbf{y}_{i} - \mathbf{\mu})^2 .. seealso:: **Original reference**: Lakshminarayanan, B., Pritzel, A., & Blundell, C. (2017). *Simple and scalable predictive uncertainty estimation using deep ensembles*. Advances in neural information processing systems, 30. DOI: `arXiv:1612.01474 <https://arxiv.org/abs/1612.01474>`_. """ def __init__( self, problem, models, optimizers=None, schedulers=None, weighting=None, use_lt=True, ensemble_dim=0, ): """ Initialization of the :class:`DeepEnsembleSolverInterface` class. :param AbstractProblem problem: The problem to be solved. :param torch.nn.Module models: The neural network models to be used. :param Optimizer optimizer: The optimizer to be used. If ``None``, the :class:`torch.optim.Adam` optimizer is used. Default is ``None``. :param Scheduler scheduler: Learning rate scheduler. If ``None``, the :class:`torch.optim.lr_scheduler.ConstantLR` scheduler is used. Default is ``None``. :param WeightingInterface weighting: The weighting schema to be used. If ``None``, no weighting schema is used. Default is ``None``. :param bool use_lt: If ``True``, the solver uses LabelTensors as input. Default is ``True``. :param int ensemble_dim: The dimension along which the ensemble outputs are stacked. Default is 0. """ super().__init__( problem, models, optimizers, schedulers, weighting, use_lt ) # check consistency check_consistency(ensemble_dim, int) self._ensemble_dim = ensemble_dim
[docs] def forward(self, x, ensemble_idx=None): """ Forward pass through the ensemble models. If an `ensemble_idx` is provided, it returns the output of the specific model corresponding to that index. If no index is given, it stacks the outputs of all models along the ensemble dimension. :param LabelTensor x: The input tensor to the models. :param int ensemble_idx: Optional index to select a specific model from the ensemble. If ``None`` results for all models are stacked in ``ensemble_dim`` dimension. Default is ``None``. :return: The output of the selected model or the stacked outputs from all models. :rtype: LabelTensor """ # if an index is passed, return the specific model output for that index if ensemble_idx is not None: return self.models[ensemble_idx].forward(x) # otherwise return the stacked output return torch.stack( [self.forward(x, idx) for idx in range(self.num_ensemble)], dim=self.ensemble_dim, )
[docs] def training_step(self, batch): """ Training step for the solver, overridden for manual optimization. This method performs a forward pass, calculates the loss, and applies manual backward propagation and optimization steps for each model in the ensemble. :param list[tuple[str, dict]] batch: A batch of training data. Each element is a tuple containing a condition name and a dictionary of points. :return: The aggregated loss after the training step. :rtype: torch.Tensor """ # zero grad for optimizer for opt in self.optimizers: opt.instance.zero_grad() # perform forward passes and aggregate losses loss = super().training_step(batch) # perform backpropagation self.manual_backward(loss) # optimize for opt, sched in zip(self.optimizers, self.schedulers): opt.instance.step() sched.instance.step() return loss
@property def ensemble_dim(self): """ The dimension along which the ensemble outputs are stacked. :return: The ensemble dimension. :rtype: int """ return self._ensemble_dim @property def num_ensemble(self): """ The number of models in the ensemble. :return: The number of models in the ensemble. :rtype: int """ return len(self.models)