diff --git a/examples/robust_regression.py b/examples/robust_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..08d4d3112264957b58f461d65269fe7ddf7ff4e6
--- /dev/null
+++ b/examples/robust_regression.py
@@ -0,0 +1,132 @@
+# Demo of a robust regression model with multivariate-t distributed noise
+
+import numpy as np
+import numpy.random as npr
+np.random.seed(0)
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+sns.set_style("white")
+
+from pybasicbayes.util.text import progprint_xrange
+from pybasicbayes.distributions import Regression, RobustRegression
+
+D_out = 1
+D_in = 2
+N = 100
+
+# Make a regression model and simulate data
+A = npr.randn(D_out, D_in)
+b = npr.randn(D_out)
+Sigma = 0.1 * np.eye(D_out)
+
+true_reg = Regression(A=np.column_stack((A, b)), sigma=Sigma, affine=True)
+X = npr.randn(N, D_in)
+y = true_reg.rvs(x=X, return_xy=False)
+
+# Corrupt a fraction of the data
+inds = npr.rand(N) < 0.1
+y[inds] = 3 * npr.randn(inds.sum(), D_out)
+
+# Make a test regression and fit it
+std_reg = Regression(nu_0=D_out + 2,
+                     S_0=np.eye(D_out),
+                     M_0=np.zeros((D_out, D_in+1)),
+                     K_0=np.eye(D_in+1),
+                     affine=True)
+
+robust_reg = RobustRegression(nu_0=D_out+2,
+                      S_0=np.eye(D_out),
+                      M_0=np.zeros((D_out, D_in+1)),
+                      K_0=np.eye(D_in+1),
+                      affine=True)
+
+def _collect(r):
+    ll = r.log_likelihood((X, y))[~inds].sum()
+    err = ((y - r.predict(X))**2).sum(1)
+    mse = np.mean(err[~inds])
+    return r.A.copy(), ll, mse
+
+def _update(r):
+    r.resample([(X,y)])
+    return _collect(r)
+
+# Fit the standard regression
+smpls = [_collect(std_reg)]
+for _ in progprint_xrange(100):
+    smpls.append(_update(std_reg))
+smpls = zip(*smpls)
+std_As, std_lls, std_mses = tuple(map(np.array, smpls))
+
+# Fit the robust regression
+smpls = [_collect(robust_reg)]
+for _ in progprint_xrange(100):
+    smpls.append(_update(robust_reg))
+smpls = zip(*smpls)
+robust_As, robust_lls, robust_mses = tuple(map(np.array, smpls))
+
+
+# Plot the inferred regression function
+plt.figure(figsize=(8, 4))
+xlim = (-3, 3)
+ylim = abs(y).max()
+npts = 50
+x1, x2 = np.meshgrid(np.linspace(*xlim, npts), np.linspace(*xlim, npts))
+
+plt.subplot(131)
+mu = true_reg.predict(np.column_stack((x1.ravel(), x2.ravel())))
+plt.imshow(mu.reshape((npts, npts)),
+           cmap="RdBu", vmin=-ylim, vmax=ylim,
+           alpha=0.8,
+           extent=xlim + tuple(reversed(xlim)))
+plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray')
+plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1)
+plt.xlim(xlim)
+plt.ylim(xlim)
+plt.title("True")
+
+plt.subplot(132)
+mu = std_reg.predict(np.column_stack((x1.ravel(), x2.ravel())))
+plt.imshow(mu.reshape((npts, npts)),
+           cmap="RdBu", vmin=-ylim, vmax=ylim,
+           alpha=0.8,
+           extent=xlim + tuple(reversed(xlim)))
+plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray')
+plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1)
+plt.xlim(xlim)
+plt.ylim(xlim)
+plt.title("Standard Regression")
+
+plt.subplot(133)
+mu = robust_reg.predict(np.column_stack((x1.ravel(), x2.ravel())))
+plt.imshow(mu.reshape((npts, npts)),
+           cmap="RdBu", vmin=-ylim, vmax=ylim,
+           alpha=0.8,
+           extent=xlim + tuple(reversed(xlim)))
+plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray')
+plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1)
+plt.xlim(xlim)
+plt.ylim(xlim)
+plt.title("Robust Regression")
+
+
+print("True A:   {}".format(true_reg.A))
+print("Std A:    {}".format(std_As.mean(0)))
+print("Robust A: {}".format(robust_As.mean(0)))
+
+# Plot the log likelihoods and mean squared errors
+plt.figure(figsize=(8, 4))
+plt.subplot(121)
+plt.plot(std_lls)
+plt.plot(robust_lls)
+plt.xlabel("Iteration")
+plt.ylabel("Log Likelihood")
+
+plt.subplot(122)
+plt.plot(std_mses, label="Standard")
+plt.plot(robust_mses, label="Robust")
+plt.legend(loc="upper right")
+plt.xlabel("Iteration")
+plt.ylabel("Mean Squared Error")
+
+plt.show()
diff --git a/pybasicbayes/distributions/gaussian.py b/pybasicbayes/distributions/gaussian.py
index 66ceed683f92f8e331bf31fdb5d589427f02da77..f283986f623439653a8dd78270a17a3584df8bab 100644
--- a/pybasicbayes/distributions/gaussian.py
+++ b/pybasicbayes/distributions/gaussian.py
@@ -344,10 +344,11 @@ class Gaussian(
             - D*self.kappa_0/self.kappa_mf - self.kappa_0*self.nu_mf*
             np.dot(self.mu_mf -
                 self.mu_0,np.linalg.solve(self.sigma_mf,self.mu_mf - self.mu_0))) \
-            + invwishart_log_partitionfunction(self.sigma_0,self.nu_0) \
+            - invwishart_log_partitionfunction(self.sigma_0,self.nu_0) \
             + (self.nu_0 - D - 1)/2*loglmbdatilde - 1/2*self.nu_mf \
             * np.linalg.solve(self.sigma_mf,self.sigma_0).trace()
 
+
         return p_avgengy + q_entropy
 
     def expected_log_likelihood(self, x=None, stats=None):
diff --git a/pybasicbayes/distributions/regression.py b/pybasicbayes/distributions/regression.py
index 9e540272ab1e863718929a5966a3281698277bcf..b524061ee1a0f4ea1618164c61f143db695f4cc7 100644
--- a/pybasicbayes/distributions/regression.py
+++ b/pybasicbayes/distributions/regression.py
@@ -2,11 +2,17 @@ from __future__ import division
 from builtins import zip
 from builtins import range
 __all__ = ['Regression', 'RegressionNonconj', 'ARDRegression',
-           'AutoRegression', 'ARDAutoRegression', 'DiagonalRegression']
+           'AutoRegression', 'ARDAutoRegression', 'DiagonalRegression',
+           'RobustRegression', 'RobustAutoRegression']
+
+from warnings import warn
 
 import numpy as np
 from numpy import newaxis as na
 
+from scipy.linalg import solve_triangular
+from scipy.special import gammaln, digamma, polygamma
+
 from pybasicbayes.abstractions import GibbsSampling, MaxLikelihood, \
     MeanField, MeanFieldSVI
 from pybasicbayes.util.stats import sample_gaussian, sample_mniw, \
@@ -86,12 +92,15 @@ class Regression(GibbsSampling, MeanField, MaxLikelihood):
 
     @staticmethod
     def _natural_to_standard(natparam):
-        A,B,C,d = natparam
+        A,B,C,d = natparam   # natparam is roughly (yyT, yxT, xxT, n)
         nu = d
         Kinv = C
         K = inv_psd(Kinv)
-        M = B.dot(K)
-        S = A - B.dot(K).dot(B.T)
+        # M = B.dot(K)
+        M = np.linalg.solve(Kinv, B.T).T
+        # This subtraction seems unstable!
+        # It does not necessarily return a PSD matrix
+        S = A - M.dot(B.T)
 
         # numerical padding here...
         K += 1e-8*np.eye(K.shape[0])
@@ -99,6 +108,7 @@ class Regression(GibbsSampling, MeanField, MaxLikelihood):
         assert np.all(0 < np.linalg.eigvalsh(S))
         assert np.all(0 < np.linalg.eigvalsh(K))
 
+        # standard is degrees of freedom, mean of sigma (ish), mean of A, cov of rows of A
         return nu, S, M, K
 
     ### getting statistics
@@ -564,6 +574,10 @@ class DiagonalRegression(Regression, MeanFieldSVI):
         # Cache the standard parameters for A as well
         self._mf_A_cache = {}
 
+        # Store the natural hypparams.  These correspond to the suff. stats
+        # (y^2, yxT, xxT, n)
+        # self.natural_hypparam = (2 * self.beta_0, self.h_0, self.J_0, 1.0)
+
     @property
     def D_out(self):
         return self._D_out
@@ -761,7 +775,6 @@ class DiagonalRegression(Regression, MeanFieldSVI):
 
         ysq, yxT, xxT, n = stats
 
-        assert np.all(n > 0), "Cannot perform max likelihood with zero data points!"
         self.A = np.array([
             np.linalg.solve(self.J_0 + xxTd, self.h_0 + yxTd)
             for xxTd, yxTd in zip(xxT, yxT)
@@ -787,6 +800,11 @@ class DiagonalRegression(Regression, MeanFieldSVI):
         self._meanfieldupdate_A(stats)
         self._meanfieldupdate_sigma(stats)
 
+        # Update A and sigmasq_flat
+        A, _, sigmasq_inv, _ = self.mf_expectations
+        self.A = A.copy()
+        self.sigmasq_flat = 1. / sigmasq_inv
+
     def _meanfieldupdate_A(self, stats, prob=1.0, stepsize=1.0):
         E_sigmasq_inv = self.mf_alpha / self.mf_beta
         _, E_yxT, E_xxT, _ = stats  / prob
@@ -890,6 +908,249 @@ class DiagonalRegression(Regression, MeanFieldSVI):
         self._meanfieldupdate_sigma(stats, prob=prob, stepsize=stepsize)
 
 
+class RobustRegression(Regression):
+    """
+    Regression with multivariate-t distributed noise.
+
+        y | x ~ t(Ax + b, \Sigma, \nu)
+
+    where \nu >= 1 is the degrees of freedom.
+
+    This is equivalent to the model,
+
+        \tau ~ Gamma(\nu/2,  \nu/2)
+        y | x, \tau ~ N(Ax + b, \Sigma / \tau)
+
+    To perform inference in this model, we will introduce
+    auxiliary variables tau (precisions).  With these, we
+    can compute sufficient statistics scaled by \tau and
+    use the standard regression object to
+    update A, b, Sigma | x, y, \tau.
+
+    The degrees of freedom parameter \nu is updated via maximum
+    likelihood using a generalized Newton's method proposed by
+    Tom Minka.  We are not using any prior on \nu, but we 
+    could experiment with updating \nu under an
+    uninformative prior, e.g. p(\nu) \propto \nu^{-2},
+    which is equivalent to a flat prior on \nu^{-1}.
+    """
+    def __init__(
+            self, nu_0=None,S_0=None, M_0=None, K_0=None, affine=False,
+            A=None, sigma=None, nu=None):
+
+        # Default to a somewhat intermediate value of nu
+        self.nu = self.default_nu = nu if nu is not None else 4.0
+
+        super(RobustRegression, self).__init__(
+            nu_0=nu_0, S_0=S_0, M_0=M_0, K_0=K_0, affine=affine, A=A, sigma=sigma)
+
+    def log_likelihood(self,xy):
+        assert isinstance(xy, (tuple, np.ndarray))
+        sigma, D, nu = self.sigma, self.D_out, self.nu
+        x, y = (xy[:,:-D], xy[:,-D:]) if isinstance(xy,np.ndarray) else xy
+
+        sigma_inv, L = inv_psd(sigma, return_chol=True)
+        r = y - self.predict(x)
+        z = sigma_inv.dot(r.T).T
+
+        out = -0.5 * (nu + D) * np.log(1.0 + (r * z).sum(1) / nu)
+        out += gammaln((nu + D) / 2.0) - gammaln(nu / 2.0) - D / 2.0 * np.log(nu) \
+            - D / 2.0 * np.log(np.pi) - np.log(np.diag(L)).sum()
+
+        return out
+
+    def rvs(self,x=None,size=1,return_xy=True):
+        A, sigma, nu, D = self.A, self.sigma, self.nu, self.D_out
+
+        if self.affine:
+            A, b = A[:,:-1], A[:,-1]
+
+        x = np.random.normal(size=(size, A.shape[1])) if x is None else x
+        N = x.shape[0]
+        mu = self.predict(x)
+
+        # Sample precisions and t-distributed residuals
+        tau = np.random.gamma(nu / 2.0, 2.0 / nu, size=(N,))
+        resid = np.random.randn(N, D).dot(np.linalg.cholesky(sigma).T)
+        resid /= np.sqrt(tau[:, None])
+
+        y = mu + resid
+        return np.hstack((x,y)) if return_xy else y
+
+    def _get_statistics(self, data):
+        raise Exception("RobustRegression needs scaled statistics.")
+
+    def _get_scaled_statistics(self, data, precisions):
+        assert isinstance(data, (list, tuple, np.ndarray))
+        if isinstance(data,list):
+            return sum((self._get_scaled_statistics(d, p) for d, p in zip(data, precisions)),
+                       self._empty_statistics())
+
+        elif isinstance(data, tuple):
+            x, y = data
+            bad = np.isnan(x).any(1) | np.isnan(y).any(1)
+            x, y = x[~bad], y[~bad]
+            precisions = precisions[~bad]
+            sqrt_prec = np.sqrt(precisions)
+            n, D = y.shape
+
+            if self.affine:
+                x = np.column_stack((x, np.ones(n)))
+
+            # Scale by the precision
+            # xs = x * sqrt_prec[:, na]
+            # ys = y * sqrt_prec[:, na]
+            xs = x * np.tile(sqrt_prec[:, None], (1, x.shape[1]))
+            ys = y * np.tile(sqrt_prec[:, None], (1, D))
+
+            xxT, yxT, yyT = xs.T.dot(xs), ys.T.dot(xs), ys.T.dot(ys)
+            return np.array([yyT, yxT, xxT, n])
+
+        else:
+            # data passed in like np.hstack((x, y))
+            # x, y = data[:,:-self.D_out], data[:,-self.D_out:]
+            # return self._get_scaled_statistics((x, y), precisions)
+            bad = np.isnan(data).any(1)
+            data = data[~bad]
+            precisions = precisions[~bad]
+            n, D = data.shape[0], self.D_out
+
+            # This tile call is suboptimal but without it we can hit issues
+            # with strided data, as in autoregressive models.
+            scaled_data = data * np.tile(precisions[:,None], (1, data.shape[1]))
+            statmat = scaled_data.T.dot(data)
+
+            xxT, yxT, yyT = \
+                statmat[:-D,:-D], statmat[-D:,:-D], statmat[-D:,-D:]
+
+            if self.affine:
+                xy = scaled_data.sum(0)
+                x, y = xy[:-D], xy[-D:]
+                xxT = blockarray([[xxT,     x[:,na]],
+                                  [x[na,:], np.atleast_2d(precisions.sum())]])
+                yxT = np.hstack((yxT, y[:,na]))
+
+            return np.array([yyT, yxT, xxT, n])
+
+    def resample(self, data=[], stats=None):
+        assert stats is None, \
+            "We only support RobustRegression.resample() with data, not stats."
+
+        # First sample auxiliary variables for each data point
+        tau = self._resample_precision(data)
+
+        # Compute statistics, scaling by tau, and resample as in standard Regression
+        stats = self._get_scaled_statistics(data, tau)
+        super(RobustRegression, self).resample(stats=stats)
+
+        # Resample degrees of freedom \nu
+        self._resample_nu(tau)
+
+    def _resample_precision(self, data):
+        assert isinstance(data, (list, tuple, np.ndarray))
+        if isinstance(data, list):
+            return [self._resample_precision(d) for d in data]
+
+        elif isinstance(data, tuple):
+            x, y = data
+
+        else:
+            x, y = data[:, :-self.D_out], data[:, -self.D_out:]
+
+        assert x.ndim == y.ndim == 2
+        assert x.shape[0] == y.shape[0]
+        assert x.shape[1] == self.D_in - 1 if self.affine else self.D_in
+        assert y.shape[1] == self.D_out
+        N = x.shape[0]
+
+        # Weed out the nan's
+        bad = np.any(np.isnan(x), axis=1) | np.any(np.isnan(y), axis=1)
+
+        # Compute posterior params of gamma distribution
+        a_post = self.nu / 2.0 + self.D_out / 2.0
+
+        r = y - self.predict(x)
+        sigma_inv = inv_psd(self.sigma)
+        z = sigma_inv.dot(r.T).T
+        b_post = self.nu / 2.0 + (r * z).sum(1) / 2.0
+
+        assert np.isscalar(a_post) and b_post.shape == (N,)
+        tau = np.nan * np.ones(N)
+        tau[~bad] = np.random.gamma(a_post, 1./b_post[~bad])
+
+        return tau
+        
+    def _resample_nu(self, tau, N_steps=100, prop_std=0.1, alpha=1, beta=1):
+        """
+        Update the degree of freedom parameter with 
+        Metropolis-Hastings. Assume a prior nu ~ Ga(alpha, beta) 
+        and use a proposal nu' ~ N(nu, prop_std^2). If proposals
+        are negative, reject automatically due to likelihood.
+        """
+        # Convert tau to a list of arrays
+        taus = [tau] if isinstance(tau, np.ndarray) else tau
+
+        N = 0
+        E_tau = 0
+        E_logtau = 0
+        for tau in taus:
+            bad = ~np.isfinite(tau)
+            N += np.sum(~bad)
+            E_tau += np.sum(tau[~bad])
+            E_logtau += np.sum(np.log(tau[~bad]))
+
+        if N > 0:
+            E_tau /= N
+            E_logtau /= N
+        
+        # Compute the log prior, likelihood, and posterior
+        lprior = lambda nu: (alpha - 1) * np.log(nu) - alpha * nu
+        ll = lambda nu: N * (nu/2 * np.log(nu/2)  - gammaln(nu/2) + (nu/2 - 1) * E_logtau - nu/2 * E_tau)
+        lp = lambda nu: ll(nu) + lprior(nu)
+
+        lp_curr = lp(self.nu)
+        for step in range(N_steps):
+            # Symmetric proposal
+            nu_new = self.nu + prop_std * np.random.randn()
+            if nu_new <1e-3:
+                # Reject if too small
+                continue
+
+            # Accept / reject based on likelihoods
+            lp_new = lp(nu_new)
+            if np.log(np.random.rand()) < lp_new - lp_curr:
+                self.nu = nu_new
+                lp_curr = lp_new
+        
+    # Not supporting MLE or mean field for now
+    def max_likelihood(self,data,weights=None,stats=None):
+        raise NotImplementedError
+
+    def meanfieldupdate(self, data=None, weights=None, stats=None):
+        raise NotImplementedError
+
+    def meanfield_sgdstep(self, data, weights, prob, stepsize, stats=None):
+        raise NotImplementedError
+
+    def meanfield_expectedstats(self):
+        raise NotImplementedError
+
+    def expected_log_likelihood(self, xy=None, stats=None):
+        raise NotImplementedError
+
+    def get_vlb(self):
+        raise NotImplementedError
+
+    def resample_from_mf(self):
+        raise NotImplementedError
+
+    def _set_params_from_mf(self):
+        raise NotImplementedError
+
+    def _initialize_mean_field(self):
+        pass
+
+
 class _ARMixin(object):
     @property
     def nlags(self):
@@ -903,7 +1164,7 @@ class _ARMixin(object):
         return self.D_out
 
     def predict(self, x):
-        return super(_ARMixin,self).predict(np.atleast_2d(x.ravel()))
+        return super(_ARMixin,self).predict(np.atleast_2d(x))
 
     def rvs(self,lagged_data):
         return super(_ARMixin,self).rvs(
@@ -939,3 +1200,7 @@ class ARDAutoRegression(_ARMixin,ARDRegression):
                 + ([1] if M_0.shape[1] % M_0.shape[0] and M_0.shape[0] != 1 else [])
         super(ARDAutoRegression,self).__init__(
                 M_0=M_0,blocksizes=blocksizes,**kwargs)
+
+
+class RobustAutoRegression(_ARMixin, RobustRegression):
+    pass
diff --git a/pybasicbayes/models/factor_analysis.py b/pybasicbayes/models/factor_analysis.py
index 0fa11857b16f01d0f4f12123e80115260686cc93..23aa648611b11b7e1a6a0a63c175eff98e01d1eb 100644
--- a/pybasicbayes/models/factor_analysis.py
+++ b/pybasicbayes/models/factor_analysis.py
@@ -23,12 +23,12 @@ class FactorAnalysisStates(object):
     """
     Wrapper for the latent states of a factor analysis model
     """
-    def __init__(self, model, data, mask=None):
+    def __init__(self, model, data, mask=None, **kwargs):
         self.model = model
         self.X = data
-        self.mask = mask
         if mask is None:
             mask = np.ones_like(data, dtype=bool)
+        self.mask = mask
         assert data.shape == mask.shape and mask.dtype == bool
         assert self.X.shape[1] == self.D_obs
 
@@ -58,8 +58,18 @@ class FactorAnalysisStates(object):
 
 
     def log_likelihood(self):
-        mu = np.dot(self.Z, self.W.T)
-        return -0.5 * np.sum(((self.X - mu) * self.mask) ** 2 / self.sigmasq)
+        # mu = np.dot(self.Z, self.W.T)
+        # return -0.5 * np.sum(((self.X - mu) * self.mask) ** 2 / self.sigmasq)
+
+        # Compute the marginal likelihood, integrating out z
+        mu_x = np.zeros(self.D_obs)
+        Sigma_x = self.W.dot(self.W.T) + np.diag(self.sigmasq)
+
+        if not np.all(self.mask):
+            raise Exception("Need to implement this!")
+        else:
+            from scipy.stats import multivariate_normal
+            return multivariate_normal(mu_x, Sigma_x).logpdf(self.X)
 
     ## Gibbs
     def resample(self):
@@ -142,6 +152,7 @@ class FactorAnalysisStates(object):
 
 class _FactorAnalysisBase(Model):
     __metaclass__ = abc.ABCMeta
+    _states_class = FactorAnalysisStates
 
     def __init__(self, D_obs, D_latent,
                  W=None, sigmasq=None,
@@ -169,8 +180,8 @@ class _FactorAnalysisBase(Model):
     def sigmasq(self):
         return self.regression.sigmasq_flat
 
-    def add_data(self, data, mask=None):
-        self.data_list.append(FactorAnalysisStates(self, data, mask=mask))
+    def add_data(self, data, mask=None, **kwargs):
+        self.data_list.append(self._states_class(self, data, mask=mask, **kwargs))
         return self.data_list[-1]
 
     def generate(self, keep=True, N=1, mask=None, **kwargs):
@@ -179,14 +190,20 @@ class _FactorAnalysisBase(Model):
         Z = np.random.randn(N, self.D_latent)
         X = np.dot(Z, W.T) + np.sqrt(sigmasq) * np.random.randn(N, self.D_obs)
 
-        data = FactorAnalysisStates(self, X, mask=mask)
+        data = self._states_class(self, X, mask=mask, **kwargs)
         data.Z = Z
         if keep:
             self.data_list.append(data)
-        return data
+        return data.X, data.Z
+
+    def _log_likelihoods(self, x, mask=None, **kwargs):
+        self.add_data(x, mask=mask, **kwargs)
+        states = self.data_list.pop()
+        return states.log_likelihood()
 
     def log_likelihood(self):
-        return np.sum([d.log_likelihood() for d in self.data_list])
+        return sum([d.log_likelihood().sum() for d in self.data_list])
+
 
     def log_probability(self):
         lp = 0
diff --git a/pybasicbayes/models/mixture.py b/pybasicbayes/models/mixture.py
index 1675f01299e1d024cd4f2228d7d3dfdb4d82b7ca..43461c4a7c78ae06921d73ac8c2d4f188700311d 100644
--- a/pybasicbayes/models/mixture.py
+++ b/pybasicbayes/models/mixture.py
@@ -477,7 +477,7 @@ class Mixture(ModelGibbsSampling, ModelMeanField, ModelEM, ModelParallelTemperin
                     [l.expectations[:,idx] for l in self.labels_list])
 
         # mixture weights
-        self.weights.max_likelihood(np.arange(len(self.components)),
+        self.weights.max_likelihood(None,
                 [l.expectations for l in self.labels_list])
 
     @property
diff --git a/pybasicbayes/util/stats.py b/pybasicbayes/util/stats.py
index 7b704570de5102826762250e7d201df33aa38ffa..9ea9b479a449c970aaffc7cab0e6f1a124461c6d 100644
--- a/pybasicbayes/util/stats.py
+++ b/pybasicbayes/util/stats.py
@@ -317,6 +317,7 @@ def invwishart_entropy(sigma,nu,chol=None):
     return invwishart_log_partitionfunction(sigma,nu,chol)-(nu-D-1)/2*Elogdetlmbda + nu*D/2
 
 def invwishart_log_partitionfunction(sigma,nu,chol=None):
+    # In Bishop B.79 notation, this is -log B(W, nu), where W = sigma^{-1}
     D = sigma.shape[0]
     chol = np.linalg.cholesky(sigma) if chol is None else chol
     return -1*(nu*np.log(chol.diagonal()).sum() - (nu*D/2*np.log(2) + D*(D-1)/4*np.log(np.pi) \
diff --git a/setup.py b/setup.py
index 0824dbfeb3fad3249d527a91fc3bb23a003a623f..360f189fb7b787b8a8cf7c0e4bc0768bd19cbfb2 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ if use_cython:
         warn('Failed to generate extension module code from Cython files')
 
 setup(name='pybasicbayes',
-      version='0.2.2',
+      version='0.2.4',
       description="Basic utilities for Bayesian inference",
       author='Matthew James Johnson',
       author_email='mattjj@csail.mit.edu',