diff --git a/pybasicbayes/distributions/dynamic_glm.py b/pybasicbayes/distributions/dynamic_glm.py index 94a11c745f30e10825e2ccb5e77dc31ecef642ab..5938ee53ab600728d0e4c85a73bc3c9b8fcd4516 100644 --- a/pybasicbayes/distributions/dynamic_glm.py +++ b/pybasicbayes/distributions/dynamic_glm.py @@ -7,9 +7,10 @@ from warnings import warn from pypolyagamma import PyPolyaGamma __all__ = ['Dynamic_GLM'] + def local_multivariate_normal_draw(x, sigma, normal): """ - Function to combine pre-drawn Normals (normal) with the desired mean x and variance sigma (cause calls to multivariate normal are too costly) + Function to combine pre-drawn Normals (normal) with the desired mean x and variance sigma (calls to multivariate normal are too costly) Cholesky doesn't like 0 cov matrix, but we want it. This might need changing if in practice we see plently of 0 matrices @@ -20,9 +21,10 @@ def local_multivariate_normal_draw(x, sigma, normal): if np.isclose(sigma, 0).all(): return x else: - print("Weird covariance matrix") + print("Unusual covariance matrix") quit() + # fix seed of PyPolyaGamma, otherwise results aren't reproducible ppgsseed = 4 if ppgsseed == 4: @@ -32,9 +34,9 @@ ppgs = PyPolyaGamma(ppgsseed) class Dynamic_GLM(GibbsSampling): """ - This class enables a drifting input output iHMM with logistic link function. + This class enables a drifting input output GLM with logistic link function. - States are thus dynamic GLMs, giving us more freedom as to the inputs we give the model. + States are thus dynamic logistic regressions. This class implements this observation distribution for a single state. Hyperparameters: @@ -48,6 +50,8 @@ class Dynamic_GLM(GibbsSampling): def __init__(self, n_regressors, T, prior_mean, P_0, Q, jumplimit=1): + assert jumplimit > 0 + self.n_regressors = n_regressors self.T = T self.jumplimit = jumplimit @@ -58,6 +62,7 @@ class Dynamic_GLM(GibbsSampling): self.identity = np.eye(self.n_regressors) # not really needed, but kinda useful for state sampling self.weights = np.empty((self.T, self.n_regressors)) + # randomly initialise weights self.weights[0] = np.random.multivariate_normal(mean=self.x_0, cov=self.P_0) for t in range(1, T): self.weights[t] = self.weights[t - 1] + np.random.multivariate_normal(mean=self.noise_mean, cov=self.Q[t - 1]) @@ -99,9 +104,9 @@ class Dynamic_GLM(GibbsSampling): return np.log(out) # Gibbs sampling - def resample(self, data=[]): + def resample(self, data=[], testing_flag=False): """ - Resampling of dynamic logistic random variables. + Resampling of dynamic logistic random variables. We get all the data that is assigned to a single state, and resample its random variables. We follow the resampling scheme of Windle: "Efficient Data Augmentation in Dynamic Models for Binary and Count Data". This makes use of the forward filter backwards sample algorithm. Which uses Kalman filtering, for which we use Anderson & Moore 1979 @@ -109,40 +114,37 @@ class Dynamic_GLM(GibbsSampling): we invent more timesteps, but we don't allow the regressors to change from these steps to steps, effectively making the observations occur at the same time. """ - # TODO: Clean up this mess, I always have to call delete_obs_data because of all the saved shit! + # TODO: Could clean up all this saving of info, so as to not have to constantly call model_save.delete_obs_data() in fit_script self.psi_diff_saves = [] summary_statistics, all_times = self._get_statistics(data) - types, pseudo_counts, counts = summary_statistics # pseudo_counts are kappa_t + types, pseudo_counts, counts = summary_statistics # pseudo_counts are kappa_t in Windle scheme # if state is never active, resample from prior, but without dynamic change if len(counts) == 0: self.weights = np.tile(np.random.multivariate_normal(mean=self.x_0, cov=self.P_0), (self.T, 1)) return - # Sort out which weight vector goes to which timepoint - timepoint_map = {} - total_types = 0 - actual_obs_count = 0 - change_points = [] - prev_t = all_times[0] - 1 - fake_times = [] + # Sort out all the necessary timepoint variables (test case in dynamic_glm_dist_test.py) + timepoint_map = {} # this will ultimately map pseudo-weight onto real maps. Maps the ends of session (when all types (feature sets) have been processed) onto the correct real time points + total_types = 0 # total count of relevant time steps: actual timesteps from different features in sessions + timesteps for change within jumplimit + prev_t = all_times[0] - 1 # keep track of whether a jump happened (skipped sessions for this state) + fake_times = [] # keep track of when we introduced fake times to accomodate changes on skipped sessions for type, t in zip(types, all_times): - if t > prev_t + 1: - add_list = list(range(total_types, min(total_types + t - prev_t - 1, total_types + self.jumplimit))) - change_points += add_list - fake_times += add_list - for i, sub_t in enumerate(range(total_types, total_types + t - prev_t - 1)): # TODO: set up this loop better + if t > prev_t + 1: # if a jumnp happened, add fake timepoints, according to jumplimit + fake_times += list(range(total_types, min(total_types + t - prev_t - 1, total_types + self.jumplimit))) + for i, sub_t in enumerate(range(total_types, total_types + t - prev_t - 1)): timepoint_map[prev_t + i + 1] = min(sub_t, total_types + self.jumplimit - 1) total_types += min(t - prev_t - 1, self.jumplimit) total_types += type.shape[0] - actual_obs_count += type.shape[0] - change_points.append(total_types - 1) timepoint_map[t] = total_types - 1 prev_t = t + # how many total different feature sets at different timepoints were observed + actual_obs_count = sum([t.shape[0] for t in types]) + # We introduce pseudo-variance steps: we don't want change for the fake times, so only specific pseudo-Q's will be non-zero self.pseudo_Q = np.zeros((total_types, self.n_regressors, self.n_regressors)) - # TODO: is it okay to cut off last timepoint here? + # is it okay to cut off last timepoint here? for k in range(self.T): if k in timepoint_map: self.pseudo_Q[timepoint_map[k]] = self.Q[k] # for every real timepoint, map it's variance onto the pseudo_Q @@ -194,7 +196,7 @@ class Dynamic_GLM(GibbsSampling): # the other weights now depend on the weight sampled just after them, we need to update the Kalman estimates normals = np.random.standard_normal((total_types - 1, self.n_regressors)) # pregenerate some normals rvs to use later for k in range(total_types - 2, -1, -1): # normally -1, but we already did first sampling - if np.all(self.pseudo_Q[k] == 0): # if this is a fake time, no change is allowed, just copy the weight + if np.all(self.pseudo_Q[k] == 0): # if this is a fake time, no change is allowed, just copy the weight pseudo_weights[k] = pseudo_weights[k + 1] else: # start with the Kalman filter inferences @@ -211,7 +213,7 @@ class Dynamic_GLM(GibbsSampling): updated_x += updated_sigma[:, m] * epsilon / state_R updated_sigma -= updated_sigma.dot(np.outer(self.identity[m], self.identity[m])).dot(updated_sigma) / state_R - pseudo_weights[k] = local_multivariate_normal_draw(updated_x, updated_sigma, normals[k]) # transform our normal according to computed mean and var + pseudo_weights[k] = local_multivariate_normal_draw(updated_x, updated_sigma, normals[k]) # transform our normal according to pre-computed mean and var # put the pseudo_weights where they belong for k in range(self.T): @@ -232,6 +234,8 @@ class Dynamic_GLM(GibbsSampling): else: self.weights[k] = self.weights[k - 1] + if testing_flag: + return timepoint_map return pseudo_weights # If one wants to resample variance... # self.psi_diff_saves = np.concatenate(self.psi_diff_saves) @@ -241,6 +245,7 @@ class Dynamic_GLM(GibbsSampling): which unique features did the state encounter (types) what is their corresponding pseudo-observation (pseudo_counts) how often did they occur (counts) + also return which timepoints had non-zero data (times) """ summary_statistics = [[], [], []] times = [] @@ -280,7 +285,7 @@ class Dynamic_GLM(GibbsSampling): self.sigma_k_k_minus.append(self.sigma_k[k] + self.pseudo_Q[k]) # the amount of allowed change just adds up if no obs else: sigma, H = self.sigma_k_k_minus[k], self.H[k] # we will need this a lot, so shorten it - gain = sigma.dot(H).dot(1 / (H.T.dot(sigma).dot(H) + self.R[k])) # matrix K, elsewhere called gain + gain = sigma.dot(H).dot(1 / (H.T.dot(sigma).dot(H) + self.R[k])) # matrix K, elsewhere called gain self.gain_save.append(gain) # the gain matrix is useful for compute_means as well, save it # the two main equations, from p.44: @@ -306,10 +311,9 @@ class Dynamic_GLM(GibbsSampling): # Compute WAIC in principle return self.weights.size - ### Max likelihood + # Max likelihood def max_likelihood(self, data, weights=None): warn('ML not implemented') - def MAP(self, data, weights=None): warn('MAP not implemented') diff --git a/pybasicbayes/distributions/dynamic_multinomial.py b/pybasicbayes/distributions/dynamic_multinomial.py index dbba312b67bcbdec2310614db0833e26a5029c47..fc190ef2347d035595c299968b4e33931fe8876b 100644 --- a/pybasicbayes/distributions/dynamic_multinomial.py +++ b/pybasicbayes/distributions/dynamic_multinomial.py @@ -9,8 +9,6 @@ from pybasicbayes.abstractions import \ from scipy import sparse import numpy as np from warnings import warn -import time - from pgmult.lda import StickbreakingDynamicTopicsLDA from pgmult.utils import psi_to_pi from scipy.stats import invgamma