diff --git a/pybasicbayes/distributions/dynamic_glm.py b/pybasicbayes/distributions/dynamic_glm.py
index 94a11c745f30e10825e2ccb5e77dc31ecef642ab..5938ee53ab600728d0e4c85a73bc3c9b8fcd4516 100644
--- a/pybasicbayes/distributions/dynamic_glm.py
+++ b/pybasicbayes/distributions/dynamic_glm.py
@@ -7,9 +7,10 @@ from warnings import warn
 from pypolyagamma import PyPolyaGamma
 __all__ = ['Dynamic_GLM']
 
+
 def local_multivariate_normal_draw(x, sigma, normal):
     """
-    Function to combine pre-drawn Normals (normal) with the desired mean x and variance sigma (cause calls to multivariate normal are too costly)
+    Function to combine pre-drawn Normals (normal) with the desired mean x and variance sigma (calls to multivariate normal are too costly)
     Cholesky doesn't like 0 cov matrix, but we want it.
 
     This might need changing if in practice we see plently of 0 matrices
@@ -20,9 +21,10 @@ def local_multivariate_normal_draw(x, sigma, normal):
         if np.isclose(sigma, 0).all():
             return x
         else:
-            print("Weird covariance matrix")
+            print("Unusual covariance matrix")
             quit()
 
+
 # fix seed of PyPolyaGamma, otherwise results aren't reproducible
 ppgsseed = 4
 if ppgsseed == 4:
@@ -32,9 +34,9 @@ ppgs = PyPolyaGamma(ppgsseed)
 
 class Dynamic_GLM(GibbsSampling):
     """
-    This class enables a drifting input output iHMM with logistic link function.
+    This class enables a drifting input output GLM with logistic link function.
 
-    States are thus dynamic GLMs, giving us more freedom as to the inputs we give the model.
+    States are thus dynamic logistic regressions. This class implements this observation distribution for a single state.
 
     Hyperparameters:
 
@@ -48,6 +50,8 @@ class Dynamic_GLM(GibbsSampling):
 
     def __init__(self, n_regressors, T, prior_mean, P_0, Q, jumplimit=1):
 
+        assert jumplimit > 0
+
         self.n_regressors = n_regressors
         self.T = T
         self.jumplimit = jumplimit
@@ -58,6 +62,7 @@ class Dynamic_GLM(GibbsSampling):
         self.identity = np.eye(self.n_regressors)  # not really needed, but kinda useful for state sampling
 
         self.weights = np.empty((self.T, self.n_regressors))
+        # randomly initialise weights
         self.weights[0] = np.random.multivariate_normal(mean=self.x_0, cov=self.P_0)
         for t in range(1, T):
             self.weights[t] = self.weights[t - 1] + np.random.multivariate_normal(mean=self.noise_mean, cov=self.Q[t - 1])
@@ -99,9 +104,9 @@ class Dynamic_GLM(GibbsSampling):
         return np.log(out)
 
     # Gibbs sampling
-    def resample(self, data=[]):
+    def resample(self, data=[], testing_flag=False):
         """
-        Resampling of dynamic logistic random variables.
+        Resampling of dynamic logistic random variables. We get all the data that is assigned to a single state, and resample its random variables.
         We follow the resampling scheme of Windle: "Efficient Data Augmentation in Dynamic Models for Binary and Count Data".
         This makes use of the forward filter backwards sample algorithm. Which uses Kalman filtering, for which we use Anderson & Moore 1979
 
@@ -109,40 +114,37 @@ class Dynamic_GLM(GibbsSampling):
         we invent more timesteps, but we don't allow the regressors to change from these steps to steps, effectively making the observations occur at the same
         time.
         """
-        # TODO: Clean up this mess, I always have to call delete_obs_data because of all the saved shit!
+        # TODO: Could clean up all this saving of info, so as to not have to constantly call model_save.delete_obs_data() in fit_script
         self.psi_diff_saves = []
         summary_statistics, all_times = self._get_statistics(data)
-        types, pseudo_counts, counts = summary_statistics  # pseudo_counts are kappa_t
+        types, pseudo_counts, counts = summary_statistics  # pseudo_counts are kappa_t in Windle scheme
 
         # if state is never active, resample from prior, but without dynamic change
         if len(counts) == 0:
             self.weights = np.tile(np.random.multivariate_normal(mean=self.x_0, cov=self.P_0), (self.T, 1))
             return
 
-        # Sort out which weight vector goes to which timepoint
-        timepoint_map = {}
-        total_types = 0
-        actual_obs_count = 0
-        change_points = []
-        prev_t = all_times[0] - 1
-        fake_times = []
+        # Sort out all the necessary timepoint variables (test case in dynamic_glm_dist_test.py)
+        timepoint_map = {}  # this will ultimately map pseudo-weight onto real maps. Maps the ends of session (when all types (feature sets) have been processed) onto the correct real time points
+        total_types = 0  # total count of relevant time steps: actual timesteps from different features in sessions + timesteps for change within jumplimit
+        prev_t = all_times[0] - 1  # keep track of whether a jump happened (skipped sessions for this state)
+        fake_times = []  # keep track of when we introduced fake times to accomodate changes on skipped sessions
         for type, t in zip(types, all_times):
-            if t > prev_t + 1:
-                add_list = list(range(total_types, min(total_types + t - prev_t - 1, total_types + self.jumplimit)))
-                change_points += add_list
-                fake_times += add_list
-                for i, sub_t in enumerate(range(total_types, total_types + t - prev_t - 1)): # TODO: set up this loop better
+            if t > prev_t + 1:  # if a jumnp happened, add fake timepoints, according to jumplimit
+                fake_times += list(range(total_types, min(total_types + t - prev_t - 1, total_types + self.jumplimit)))
+                for i, sub_t in enumerate(range(total_types, total_types + t - prev_t - 1)):
                     timepoint_map[prev_t + i + 1] = min(sub_t, total_types + self.jumplimit - 1)
                 total_types += min(t - prev_t - 1, self.jumplimit)
             total_types += type.shape[0]
-            actual_obs_count += type.shape[0]
-            change_points.append(total_types - 1)
             timepoint_map[t] = total_types - 1
             prev_t = t
 
+        # how many total different feature sets at different timepoints were observed
+        actual_obs_count = sum([t.shape[0] for t in types])
+
         # We introduce pseudo-variance steps: we don't want change for the fake times, so only specific pseudo-Q's will be non-zero
         self.pseudo_Q = np.zeros((total_types, self.n_regressors, self.n_regressors))
-        # TODO: is it okay to cut off last timepoint here?
+        # is it okay to cut off last timepoint here?
         for k in range(self.T):
             if k in timepoint_map:
                 self.pseudo_Q[timepoint_map[k]] = self.Q[k]  # for every real timepoint, map it's variance onto the pseudo_Q
@@ -194,7 +196,7 @@ class Dynamic_GLM(GibbsSampling):
         # the other weights now depend on the weight sampled just after them, we need to update the Kalman estimates
         normals = np.random.standard_normal((total_types - 1, self.n_regressors))  # pregenerate some normals rvs to use later
         for k in range(total_types - 2, -1, -1):  # normally -1, but we already did first sampling
-            if np.all(self.pseudo_Q[k] == 0): # if this is a fake time, no change is allowed, just copy the weight
+            if np.all(self.pseudo_Q[k] == 0):  # if this is a fake time, no change is allowed, just copy the weight
                 pseudo_weights[k] = pseudo_weights[k + 1]
             else:
                 # start with the Kalman filter inferences
@@ -211,7 +213,7 @@ class Dynamic_GLM(GibbsSampling):
                     updated_x += updated_sigma[:, m] * epsilon / state_R
                     updated_sigma -= updated_sigma.dot(np.outer(self.identity[m], self.identity[m])).dot(updated_sigma) / state_R
 
-                pseudo_weights[k] = local_multivariate_normal_draw(updated_x, updated_sigma, normals[k])  # transform our normal according to computed mean and var
+                pseudo_weights[k] = local_multivariate_normal_draw(updated_x, updated_sigma, normals[k])  # transform our normal according to pre-computed mean and var
 
         # put the pseudo_weights where they belong
         for k in range(self.T):
@@ -232,6 +234,8 @@ class Dynamic_GLM(GibbsSampling):
             else:
                 self.weights[k] = self.weights[k - 1]
 
+        if testing_flag:
+            return timepoint_map
         return pseudo_weights
         # If one wants to resample variance...
         # self.psi_diff_saves = np.concatenate(self.psi_diff_saves)
@@ -241,6 +245,7 @@ class Dynamic_GLM(GibbsSampling):
         which unique features did the state encounter (types)
         what is their corresponding pseudo-observation (pseudo_counts)
         how often did they occur (counts)
+        also return which timepoints had non-zero data (times)
         """
         summary_statistics = [[], [], []]
         times = []
@@ -280,7 +285,7 @@ class Dynamic_GLM(GibbsSampling):
                 self.sigma_k_k_minus.append(self.sigma_k[k] + self.pseudo_Q[k])  # the amount of allowed change just adds up if no obs
             else:
                 sigma, H = self.sigma_k_k_minus[k], self.H[k]  # we will need this a lot, so shorten it
-                gain = sigma.dot(H).dot(1 / (H.T.dot(sigma).dot(H) + self.R[k])) # matrix K, elsewhere called gain
+                gain = sigma.dot(H).dot(1 / (H.T.dot(sigma).dot(H) + self.R[k]))  # matrix K, elsewhere called gain
                 self.gain_save.append(gain)  # the gain matrix is useful for compute_means as well, save it
 
                 # the two main equations, from p.44:
@@ -306,10 +311,9 @@ class Dynamic_GLM(GibbsSampling):
         # Compute WAIC in principle
         return self.weights.size
 
-    ### Max likelihood
+    # Max likelihood
     def max_likelihood(self, data, weights=None):
         warn('ML not implemented')
 
-
     def MAP(self, data, weights=None):
         warn('MAP not implemented')
diff --git a/pybasicbayes/distributions/dynamic_multinomial.py b/pybasicbayes/distributions/dynamic_multinomial.py
index dbba312b67bcbdec2310614db0833e26a5029c47..fc190ef2347d035595c299968b4e33931fe8876b 100644
--- a/pybasicbayes/distributions/dynamic_multinomial.py
+++ b/pybasicbayes/distributions/dynamic_multinomial.py
@@ -9,8 +9,6 @@ from pybasicbayes.abstractions import \
 from scipy import sparse
 import numpy as np
 from warnings import warn
-import time
-
 from pgmult.lda import StickbreakingDynamicTopicsLDA
 from pgmult.utils import psi_to_pi
 from scipy.stats import invgamma