From 9617375a77cb0be105e1a1c7ecb6caf0ffed771e Mon Sep 17 00:00:00 2001 From: Scott Linderman <scott.linderman@gmail.com> Date: Tue, 17 Oct 2017 15:31:25 -0400 Subject: [PATCH] adding a robust regression class with multivariate-t distributed noise. no sampling of degrees of freedom yet. --- examples/robust_regression.py | 132 ++++++++++++++++++++ pybasicbayes/distributions/regression.py | 147 +++++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 examples/robust_regression.py diff --git a/examples/robust_regression.py b/examples/robust_regression.py new file mode 100644 index 0000000..16139b6 --- /dev/null +++ b/examples/robust_regression.py @@ -0,0 +1,132 @@ +# Demo of a robust regression model with multivariate-t distributed noise + +import numpy as np +import numpy.random as npr +np.random.seed(0) + +import matplotlib.pyplot as plt +import seaborn as sns +sns.set_style("white") + +from pybasicbayes.util.text import progprint_xrange +from pybasicbayes.distributions.regression import Regression, RobustRegression + +D_out = 1 +D_in = 2 +N = 100 + +# Make a regression model and simulate data +A = npr.randn(D_out, D_in) +b = npr.randn(D_out) +Sigma = 0.1 * np.eye(D_out) + +true_reg = Regression(A=np.column_stack((A, b)), sigma=Sigma, affine=True) +X = npr.randn(N, D_in) +y = true_reg.rvs(x=X, return_xy=False) + +# Corrupt a fraction of the data +inds = npr.rand(N) < 0.1 +y[inds] = 3 * npr.randn(inds.sum(), D_out) + +# Make a test regression and fit it +std_reg = Regression(nu_0=D_out + 2, + S_0=np.eye(D_out), + M_0=np.zeros((D_out, D_in+1)), + K_0=np.eye(D_in+1), + affine=True) + +robust_reg = RobustRegression(nu_0=D_out+2, + S_0=np.eye(D_out), + M_0=np.zeros((D_out, D_in+1)), + K_0=np.eye(D_in+1), + affine=True) + +def _collect(r): + ll = r.log_likelihood((X, y))[~inds].sum() + err = ((y - r.predict(X))**2).sum(1) + mse = np.mean(err[~inds]) + return r.A.copy(), ll, mse + +def _update(r): + r.resample([(X,y)]) + return _collect(r) + +# Fit the standard regression +smpls = [_collect(std_reg)] +for _ in progprint_xrange(100): + smpls.append(_update(std_reg)) +smpls = zip(*smpls) +std_As, std_lls, std_mses = tuple(map(np.array, smpls)) + +# Fit the robust regression +smpls = [_collect(robust_reg)] +for _ in progprint_xrange(100): + smpls.append(_update(robust_reg)) +smpls = zip(*smpls) +robust_As, robust_lls, robust_mses = tuple(map(np.array, smpls)) + + +# Plot the inferred regression function +plt.figure(figsize=(8, 4)) +xlim = (-3, 3) +ylim = abs(y).max() +npts = 50 +x1, x2 = np.meshgrid(np.linspace(*xlim, npts), np.linspace(*xlim, npts)) + +plt.subplot(131) +mu = true_reg.predict(np.column_stack((x1.ravel(), x2.ravel()))) +plt.imshow(mu.reshape((npts, npts)), + cmap="RdBu", vmin=-ylim, vmax=ylim, + alpha=0.8, + extent=xlim + tuple(reversed(xlim))) +plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') +plt.scatter(X[inds,0], X[inds,1], c=y[inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) +plt.xlim(xlim) +plt.ylim(xlim) +plt.title("True") + +plt.subplot(132) +mu = std_reg.predict(np.column_stack((x1.ravel(), x2.ravel()))) +plt.imshow(mu.reshape((npts, npts)), + cmap="RdBu", vmin=-ylim, vmax=ylim, + alpha=0.8, + extent=xlim + tuple(reversed(xlim))) +plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') +plt.scatter(X[inds,0], X[inds,1], c=y[inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) +plt.xlim(xlim) +plt.ylim(xlim) +plt.title("Standard Regression") + +plt.subplot(133) +mu = robust_reg.predict(np.column_stack((x1.ravel(), x2.ravel()))) +plt.imshow(mu.reshape((npts, npts)), + cmap="RdBu", vmin=-ylim, vmax=ylim, + alpha=0.8, + extent=xlim + tuple(reversed(xlim))) +plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') +plt.scatter(X[inds,0], X[inds,1], c=y[inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) +plt.xlim(xlim) +plt.ylim(xlim) +plt.title("Robust Regression") + + +print("True A: {}".format(true_reg.A)) +print("Std A: {}".format(std_As.mean(0))) +print("Robust A: {}".format(robust_As.mean(0))) + +# Plot the log likelihoods and mean squared errors +plt.figure(figsize=(8, 4)) +plt.subplot(121) +plt.plot(std_lls) +plt.plot(robust_lls) +plt.xlabel("Iteration") +plt.ylabel("Log Likelihood") + +plt.subplot(122) +plt.plot(std_mses, label="Standard") +plt.plot(robust_mses, label="Robust") +plt.legend(loc="upper right") +plt.xlabel("Iteration") +plt.ylabel("Mean Squared Error") + +plt.show() diff --git a/pybasicbayes/distributions/regression.py b/pybasicbayes/distributions/regression.py index 5ae35ab..f4382cd 100644 --- a/pybasicbayes/distributions/regression.py +++ b/pybasicbayes/distributions/regression.py @@ -7,6 +7,9 @@ __all__ = ['Regression', 'RegressionNonconj', 'ARDRegression', import numpy as np from numpy import newaxis as na +from scipy.linalg import solve_triangular +from scipy.special import gammaln + from pybasicbayes.abstractions import GibbsSampling, MaxLikelihood, \ MeanField, MeanFieldSVI from pybasicbayes.util.stats import sample_gaussian, sample_mniw, \ @@ -898,6 +901,150 @@ class DiagonalRegression(Regression, MeanFieldSVI): self._meanfieldupdate_sigma(stats, prob=prob, stepsize=stepsize) +class RobustRegression(Regression): + """ + Regression with multivariate-t distributed noise. + + y | x ~ t(Ax + b, \Sigma, \nu) + + where \nu >= 1 is the degrees of freedom. + + This is equivalent to the model, + + \tau ~ Gamma(\nu/2, \nu/2) + y | x, \tau ~ N(Ax + b, \Sigma / \tau) + + Note that we can rewrite the second step as + + z | x ~ N(Ax + b, \Sigma) + y = z / \sqrt{\tau} + + To perform inference in this model, we will introduce + auxiliary variables tau (precisions). With these, we + can compute z = \sqrt{\tau} y and use the standard regression + object ot update A, b, Sigma | x, z. + + We could also experiment with sampling \nu under an + uninformative prior, e.g. p(\nu) \propto \nu^{-2}, + which is equivalent to a flat prior on \nu^{-1}. + """ + def __init__( + self, nu_0=None,S_0=None, M_0=None, K_0=None, affine=False, + A=None, sigma=None, nu=None): + + # Default to a somewhat intermediate value of nu + self.nu = nu if nu is not None else 4 + + super(RobustRegression, self).__init__( + nu_0=nu_0, S_0=S_0, M_0=M_0, K_0=K_0, affine=affine, A=A, sigma=sigma) + + def log_likelihood(self,xy): + assert isinstance(xy, (tuple, np.ndarray)) + sigma, D, nu = self.sigma, self.D_out, self.nu + x, y = (xy[:,:-D], xy[:,-D:]) if isinstance(xy,np.ndarray) else xy + + sigma_inv, L = inv_psd(sigma, return_chol=True) + r = y - self.predict(x) + z = solve_triangular(L, r.T, lower=True).T + + out = -0.5 * (nu + D) * np.log(1.0 + (z * z).sum(1) / nu) + out += gammaln((nu + D) / 2.0) - gammaln(nu / 2.0) - D / 2.0 * np.log(nu) \ + - D / 2.0 * np.log(np.pi) - np.log(np.diag(L)).sum() + + return out + + def resample(self, data=[], stats=None): + assert stats is None, \ + "We only support RobustRegression.resample() with data, not stats." + + # First sample auxiliary variables for each data point + tau = self._resample_precision(data) + + # Rescale the data by \sqrt{\tau} and resample as in standard Regression + super(RobustRegression, self).resample(self._rescale_data(data, tau)) + + # Resample degrees of freedom \nu + self._resample_nu() + + def _resample_precision(self, data): + assert isinstance(data, (list, tuple, np.ndarray)) + if isinstance(data, list): + return [self._resample_precision(d) for d in data] + + elif isinstance(data, tuple): + x, y = data + + else: + x, y = data[:, :self.D_in], data[:, self.D_in:] + + assert x.ndim == y.ndim == 2 + assert x.shape[0] == y.shape[0] + assert x.shape[1] == self.D_in - 1 if self.affine else self.D_in + assert y.shape[1] == self.D_out + N = x.shape[0] + + # Compute posterior params of gamma distribution + a_post = self.nu / 2.0 + self.D_out / 2.0 + + r = y - self.predict(x) + L = np.linalg.cholesky(self.sigma) + # z = dpotrs(L, r.T, lower=True)[0].T + z = solve_triangular(L, r.T, lower=True).T + b_post = self.nu / 2.0 + (z * z).sum(1) / 2.0 + + assert np.isscalar(a_post) and b_post.shape == (N,) + tau = np.random.gamma(a_post, 1./b_post) + + return tau + + def _rescale_data(self, data, precisions): + assert isinstance(data, (list, tuple, np.ndarray)) + if isinstance(data, list): + assert isinstance(precisions, list) + return [self._rescale_data(d, p) for d, p in zip(data, precisions)] + + elif isinstance(data, tuple): + x, y = data + + else: + x, y = data[:, :self.D_in], data[:, self.D_in:] + + # Return data as an array so that _ARMixin can work with it. + # Note that the AR striding memory benefits are lost because + # this forces us to copy memory. + return np.hstack((x, np.sqrt(precisions)[:, None] * y)) + + def _resample_nu(self): + # TODO: Resample the degrees of freedom parameter + pass + + # Not supporting MLE or mean field for now + def max_likelihood(self,data,weights=None,stats=None): + raise NotImplementedError + + def meanfieldupdate(self, data=None, weights=None, stats=None): + raise NotImplementedError + + def meanfield_sgdstep(self, data, weights, prob, stepsize, stats=None): + raise NotImplementedError + + def meanfield_expectedstats(self): + raise NotImplementedError + + def expected_log_likelihood(self, xy=None, stats=None): + raise NotImplementedError + + def get_vlb(self): + raise NotImplementedError + + def resample_from_mf(self): + raise NotImplementedError + + def _set_params_from_mf(self): + raise NotImplementedError + + def _initialize_mean_field(self): + pass class _ARMixin(object): -- GitLab