Source code for ezyrb.approximation.gpr

"""
Module wrapper exploiting `GPy` for Gaussian Process Regression
"""
import numpy as np
from scipy.optimize import minimize
from sklearn.gaussian_process import GaussianProcessRegressor

from .approximation import Approximation



[docs]
class GPR(Approximation):
    """
    Multidimensional regression using Gaussian process.

    :cvar numpy.ndarray X_sample: the array containing the input points,
        arranged by row.
    :cvar numpy.ndarray Y_sample: the array containing the output values,
        arranged by row.
    :cvar sklearn.gaussian_process.GaussianProcessRegressor model: the
        regression model.
    :cvar sklearn.gaussian_process.kernels.Kernel kern: kernel object from
        sklearn.
    :cvar bool normalizer: whether to normilize `values` or not.  Defaults to
        True.
    :cvar int optimization_restart: number of restarts for the optimization.
        Defaults to 20.

    :Example:

        >>> import ezyrb
        >>> import numpy as np
        >>> x = np.random.uniform(-1, 1, size=(4, 2))
        >>> y = (np.sin(x[:, 0]) + np.cos(x[:, 1]**3)).reshape(-1, 1)
        >>> gpr = ezyrb.GPR()
        >>> gpr.fit(x, y)
        >>> y_pred = gpr.predict(x)
        >>> print(np.allclose(y, y_pred))

    """
    def __init__(self, kern=None, normalizer=True, optimization_restart=20):
        """
        Initialize a Gaussian Process Regressor.
        
        :param kern: Kernel object from sklearn. Default is None.
        :param bool normalizer: Whether to normalize values. Default is True.
        :param int optimization_restart: Number of restarts for optimization.
            Default is 20.
        """

        self.X_sample = None
        self.Y_sample = None
        self.kern = kern
        self.normalizer = normalizer  # TODO: avoid normalizer inside GPR class
        self.optimization_restart = optimization_restart
        self.model = None


[docs]
    def fit(self, points, values):
        """
        Construct the regression given `points` and `values`.

        :param array_like points: the coordinates of the points.
        :param array_like values: the values in the points.
        """
        self.X_sample = np.array(points)
        self.Y_sample = np.array(values)
        if self.X_sample.ndim == 1:
            self.X_sample = self.X_sample.reshape(-1, 1)
        if self.Y_sample.ndim == 1:
            self.Y_sample = self.Y_sample.reshape(-1, 1)

        self.model = GaussianProcessRegressor(
            kernel=self.kern, n_restarts_optimizer=self.optimization_restart,
            normalize_y=self.normalizer)
        self.model.fit(self.X_sample, self.Y_sample)



[docs]
    def predict(self, new_points, return_variance=False):
        """
        Predict the mean and the variance of Gaussian distribution at given
        `new_points`.

        :param array_like new_points: the coordinates of the given points.
        :param bool return_variance: flag to return also the variance.
            Default is False.
        :return: the mean and the variance
        :rtype: (numpy.ndarray, numpy.ndarray)
        """
        new_points = np.atleast_2d(new_points)
        return self.model.predict(new_points, return_std=return_variance)



[docs]
    def optimal_mu(self, bounds, optimization_restart=10):
        """
        Proposes the next sampling point by looking at the point where the
        Gaussian covariance is maximized. A gradient method (with multi
        starting points) is adopted for the optimization.

        :param numpy.ndarray bounds: the boundaries in the gradient
            optimization. The shape must be (*input_dim*, 2), where *input_dim*
            is the dimension of the input points.
        :param int optimization_restart: the number of restart in the gradient
            optimization. Default is 10.
        """
        dim = self.X_sample.shape[1]
        min_val = 1
        min_x = None

        def min_obj(X):
            return -1 * np.linalg.norm(self.predict(X.reshape(1, -1), True)[1])

        initial_starts = np.random.uniform(bounds[:, 0],
                                           bounds[:, 1],
                                           size=(optimization_restart, dim))

        # Find the best optimum by starting from n_restart different random
        # points.
        for x0 in initial_starts:
            res = minimize(min_obj, x0, bounds=bounds, method='L-BFGS-B')

            if res.fun < min_val:
                min_val = res.fun
                min_x = res.x

        return min_x.reshape(1, -1)