diff --git a/examples/robust_regression.py b/examples/robust_regression.py index 8ba563e4261ba03967407888a2e0b744769b6333..08d4d3112264957b58f461d65269fe7ddf7ff4e6 100644 --- a/examples/robust_regression.py +++ b/examples/robust_regression.py @@ -79,8 +79,8 @@ plt.imshow(mu.reshape((npts, npts)), cmap="RdBu", vmin=-ylim, vmax=ylim, alpha=0.8, extent=xlim + tuple(reversed(xlim))) -plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') -plt.scatter(X[inds,0], X[inds,1], c=y[inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) +plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') +plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) plt.xlim(xlim) plt.ylim(xlim) plt.title("True") @@ -91,8 +91,8 @@ plt.imshow(mu.reshape((npts, npts)), cmap="RdBu", vmin=-ylim, vmax=ylim, alpha=0.8, extent=xlim + tuple(reversed(xlim))) -plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') -plt.scatter(X[inds,0], X[inds,1], c=y[inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) +plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') +plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) plt.xlim(xlim) plt.ylim(xlim) plt.title("Standard Regression") @@ -103,8 +103,8 @@ plt.imshow(mu.reshape((npts, npts)), cmap="RdBu", vmin=-ylim, vmax=ylim, alpha=0.8, extent=xlim + tuple(reversed(xlim))) -plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') -plt.scatter(X[inds,0], X[inds,1], c=y[inds], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) +plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') +plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) plt.xlim(xlim) plt.ylim(xlim) plt.title("Robust Regression") diff --git a/pybasicbayes/distributions/regression.py b/pybasicbayes/distributions/regression.py index 5e53a6548392b357cf0c53b003f27847e0a5acb1..4be10ada2303ba9fc574b37e7d238db30c0e8f36 100644 --- a/pybasicbayes/distributions/regression.py +++ b/pybasicbayes/distributions/regression.py @@ -5,11 +5,13 @@ __all__ = ['Regression', 'RegressionNonconj', 'ARDRegression', 'AutoRegression', 'ARDAutoRegression', 'DiagonalRegression', 'RobustRegression', 'RobustAutoRegression'] +from warnings import warn + import numpy as np from numpy import newaxis as na from scipy.linalg import solve_triangular -from scipy.special import gammaln +from scipy.special import gammaln, digamma, polygamma from pybasicbayes.abstractions import GibbsSampling, MaxLikelihood, \ MeanField, MeanFieldSVI @@ -925,7 +927,10 @@ class RobustRegression(Regression): use the standard regression object to update A, b, Sigma | x, y, \tau. - TODO: We could also experiment with sampling \nu under an + The degrees of freedom parameter \nu is updated via maximum + likelihood using a generalized Newton's method proposed by + Tom Minka. We are not using any prior on \nu, but we + could experiment with updating \nu under an uninformative prior, e.g. p(\nu) \propto \nu^{-2}, which is equivalent to a flat prior on \nu^{-1}. """ @@ -1039,7 +1044,7 @@ class RobustRegression(Regression): super(RobustRegression, self).resample(stats=stats) # Resample degrees of freedom \nu - self._resample_nu() + self._resample_nu(tau) def _resample_precision(self, data): assert isinstance(data, (list, tuple, np.ndarray)) @@ -1075,9 +1080,48 @@ class RobustRegression(Regression): return tau - def _resample_nu(self): - # TODO: Resample the degrees of freedom parameter - pass + def _resample_nu(self, tau, nu0=4, max_iter=100, nu_min=1e-3, nu_max=100, tol=1e-4, verbose=False): + """ + Generalized Newton's method for the degrees of freedom parameter, nu, + of a Student's t distribution. See the notebook in the doc/students_t + folder for a complete derivation. + """ + if len(tau) == 0: + self.nu = nu0 + return + + tau = np.concatenate(tau) if isinstance(tau, list) else tau + E_tau = np.mean(tau) + E_logtau = np.mean(np.log(tau)) + + from scipy.special import digamma, polygamma + delbo = lambda nu: 1/2 * (1 + np.log(nu/2)) - 1/2 * digamma(nu/2) + 1/2 * E_logtau - 1/2 * E_tau + ddelbo = lambda nu: 1/(2 * nu) - 1/4 * polygamma(1, nu/2) + + dnu = np.inf + nu = nu0 + for itr in range(max_iter): + if abs(dnu) < tol: + break + + if nu < nu_min or nu > nu_max: + warn("generalized_newton_studentst_dof fixed point grew beyond " + "bounds [{},{}].".format(nu_min, nu_max)) + nu = np.clip(nu, nu_min, nu_max) + break + + # Perform the generalized Newton update + a = -nu**2 * ddelbo(nu) + b = delbo(nu) - a / nu + assert a > 0 and b < 0, "generalized_newton_studentst_dof encountered invalid values of a,b" + dnu = -a / b - nu + nu = nu + dnu + + if itr == max_iter - 1: + warn("generalized_newton_studentst_dof failed to converge" + "at tolerance {} in {} iterations.".format(tol, itr)) + + self.nu = nu # Not supporting MLE or mean field for now def max_likelihood(self,data,weights=None,stats=None):