Source code for athena.feature_map

"""
Module for the feature map class

:References:

    - Francesco Romor, Marco Tezzele, Andrea Lario, Gianluigi Rozza.
      Kernel-based active subspaces with application to computational fluid
      dynamics parametric problems using discontinuous Galerkin method.
      International Journal for Numerical Methods in Engineering,
      123(23):6000–6027, 2022. doi:10.1002/nme.7099

"""
from functools import partial
import numpy as np
from scipy.optimize import brute, dual_annealing
import GPyOpt
from .projection_factory import ProjectionFactory


[docs]class FeatureMap(): """Feature map class. :param str distr: name of the spectral distribution to sample from the matrix. :param numpy.ndarray bias: n_features dimensional bias. It is sampled from a Unifrom distribution on the interval [0, 2*PI]. :param int input_dim: dimension of the input space. :param int n_features: dimension of the Reproducing Kernel Hilbert Space. :param list params: number of hyperparameters of the spectral distribution. :param int sigma_f: multiplicative constant of the feature map. Usually it is set as the empirical variance of the outputs. :cvar callable fmap: feature map used to project the input samples to the RKHS. Default value is rff_map. :cvar callable fjac: Jacobian matrix of fmap. Default value is rff_jac, the analytical Jacobian of fmap. :cvar numpy.ndarray _pr_matrix: n_features-by-input_dim projection matrix, whose rows are sampled from the spectral distribution distr. :raises TypeError """ def __init__(self, distr, bias, input_dim, n_features, params, sigma_f): if callable(distr): self.distr = distr elif isinstance(distr, str): self.distr = ProjectionFactory(distr) else: raise TypeError('`distr` is not valid.') self.bias = bias self.input_dim = input_dim self.n_features = n_features self.params = params self.sigma_f = sigma_f self.fmap = rff_map self.fmap_jac = rff_jac self._pr_matrix = None @property def pr_matrix(self): """ Get the projection matrix. :return: the projection matrix. :rtype: numpy.ndarray """ return self._pr_matrix
[docs] def _compute_pr_matrix(self): """ Sample the projection matrixx from the spectral distribution. :return: the projection matrix. :rtype: numpy.ndarray """ return self.distr(self.input_dim, self.n_features, self.params)
[docs] def compute_fmap(self, inputs): """ Evaluate the feature map at inputs. :param numpy.ndarray inputs: the inputs to project on the RKHS. :return: the n_features dimensional projection of the inputs. :rtype: numpy.ndarray """ if self._pr_matrix is None: self._pr_matrix = self._compute_pr_matrix() return self.fmap(inputs, self._pr_matrix, self.bias, self.n_features, self.sigma_f)
[docs] def compute_fmap_jac(self, inputs): """ Evaluate the Jacobian matrix of feature map at inputs. :param numpy.ndarray inputs: the inputs at which compute the Jacobian matrix of the feature map. :return: the n_features-by-input_dim dimensional Jacobian of the feature map at the inputs. :rtype: numpy.ndarray """ if self._pr_matrix is None: self._pr_matrix = self._compute_pr_matrix() return self.fmap_jac(inputs, self._pr_matrix, self.bias, self.n_features, self.sigma_f)
[docs] def tune_pr_matrix(self, func, bounds, method=None, maxiter=50, save_file=False, fn_args=None): """ Tune the parameters of the spectral distribution. Three methods are available: log-grid-search (brute), annealing (dual_annealing) and Bayesian stochastic optimization (bso) from GpyOpt. The default object function to optimize is athena.utils.average_rrmse, which uses a cross-validation procedure from athena.utils, see Example and tutorial 06_kernel-based_AS. :Example: >>> from athena.kas import KernelActiveSubspaces >>> from athena.feature_map import FeatureMap >>> from athena.projection_factory import ProjectionFactory >>> from athena.utils import CrossValidation, average_rrmse >>> input_dim, output_dim, n_samples = 2, 1, 30 >>> n_features, n_params = 10, 1 >>> xx = np.ones((n_samples, input_dim)) >>> f = np.ones((n_samples, output_dim)) >>> df = np.ones((n_samples, output_dim, input_dim)) >>> fm = FeatureMap(distr='laplace', bias=np.random.uniform(0, 2 * np.pi, n_features), input_dim=input_dim, n_features=n_features, params=np.zeros(n_params), sigma_f=f.var()) >>> kss = KernelActiveSubspaces(feature_map=fm, dim=1, n_features=n_features) >>> csv = CrossValidation(inputs=xx, outputs=f.reshape(-1, 1), gradients=df.reshape(n_samples, output_dim, input_dim), folds=3, subspace=kss) >>> best = fm.tune_pr_matrix(func=average_rrmse, bounds=[slice(-2, 1, 0.2) for i in range(n_params)], args=(csv, ), method='bso', maxiter=20, save_file=False) :param callable func: the objective function to be minimized. Must be in the form f(x, *args), where x is the argument in the form of a 1-D array and args is a tuple of any additional fixed parameters needed to completely specify the function. :param tuple bounds: each component of the bounds tuple must be a slice tuple of the form (low, high, step). It defines bounds for the objective function parameter in a logarithmic scale. Step will be ignored for 'dual_annealing' method. :param tuple args: any additional fixed parameters needed to completely specify the objective function. :param str method: method used to optimize the objective function. Possible values are 'brute', or 'dual_annealing'. Default value is None, and the choice is made automatically wrt the dimension of `self.params`. If the dimension is less than 4 brute force is used, otherwise a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. :param int maxiter: the maximum number of global search iterations. Default value is 50. :param bool save_file: True to save the optimal projection matrix :param dict fn_args: dictionary of arguments passed to func. :raises: ValueError :return: list that records the best score and the best projection matrix. The initial values are 0.8 and a n_features-by-input_dim numpy.ndarray of zeros. :rtype: list """ if fn_args is None: fn_args = {} best = [0.8, np.zeros((self.n_features, self.input_dim))] if method is None: method = 'brute' if len(self.params) < 4 else 'dual_annealing' if method == 'brute': self.params = brute(func=func, ranges=bounds, args=( best, *tuple(fn_args.values()), ), finish=None) elif method == 'dual_annealing': bounds_list = [[bound.start, bound.stop] for bound in bounds] self.params = 10**dual_annealing(func=func, bounds=bounds_list, args=( best, *tuple(fn_args.values()), ), maxiter=maxiter, no_local_search=False).x elif method == 'bso': bounds = [{ 'name': f'var_{str(i)}', 'type': 'continuous', 'domain': [bound.start, bound.stop], } for i, bound in enumerate(bounds)] func_obj = partial(func, best=best, **fn_args) bopt = GPyOpt.methods.BayesianOptimization(func_obj, domain=bounds, model_type='GP', acquisition_type='EI', exact_feval=True) bopt.run_optimization(max_iter=maxiter, max_time=3600, eps=1e-16, verbosity=False) self.params = 10**bopt.x_opt else: raise ValueError( "Method argument can only be 'brute' or 'dual_annealing' or 'bso'." ) self._pr_matrix = best[1] self.params = best[0] if save_file: np.save("opt_pr_matrix", best[1]) return best
[docs]def rff_map(inputs, pr_matrix, bias, n_features, sigma_f): """ Random Fourier Features map. It can be vectorized for inputs of shape n_samples-by-input_dim. :param numpy.ndarray inputs: input_dim dimensional inputs to project to the RKHS. :param numpy.ndarray pr_matrix: n_features-by-input_dim projection matrix, whose rows are sampled from the spectral distribution. :param numpy.ndarray bias: n_features dimensional bias. It is sampled from a Unifrom distribution on the interval [0, 2*PI]. :param int n_features: dimension of the RKHS :param int sigma_f: multiplicative term representing the empirical variance the outptus. :return: n_features dimensional projection of the inputs to the RKHS :rtype: numpy.ndarray """ return np.sqrt(2 / n_features) * sigma_f * np.cos( np.dot(inputs, pr_matrix.T) + bias.reshape(1, -1))
[docs]def rff_jac(inputs, pr_matrix, bias, n_features, sigma_f): """ Random Fourier Features map's Jacobian. It can be vectorized for inputs of shape n_samples-by-input_dim. :param numpy.ndarray inputs: input_dim dimensional inputs to project to the RKHS. :param numpy.ndarray pr_matrix: n_features-by-input_dim projection matrix, whose rows are sampled from the spectral distribution. :param numpy.ndarray bias: n_features dimensional bias. It is sampled from a Unifrom distribution on the interval [0, 2*PI]. :param int n_features: dimension of the RKHS :param int sigma_f: multiplicative term representing the empirical variance the outptus. :return: n_features-by-input_dim dimensional projection of the inputs to the RKHS :rtype: numpy.ndarray """ return (np.sqrt(2 / n_features) * sigma_f * (-1) * np.sin(np.dot(inputs, pr_matrix.T) + bias)).reshape( inputs.shape[0], n_features, 1) * pr_matrix