Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • master
  • working
2 results

Target

Select target project
  • agpd_public/sab_pybasicbayes
1 result
Select Git revision
  • master
  • working
2 results
Show changes
Commits on Source (2)
......@@ -9,7 +9,7 @@ __all__ = ['Dynamic_GLM']
def local_multivariate_normal_draw(x, sigma, normal):
"""
Function to combine pre-drawn Normals (normal) with the desired mean x and variance sigma
Function to combine pre-drawn Normals (normal) with the desired mean x and variance sigma (cause calls to multivariate normal are too costly)
Cholesky doesn't like 0 cov matrix, but we want it.
This might need changing if in practice we see plently of 0 matrices
......@@ -23,7 +23,7 @@ def local_multivariate_normal_draw(x, sigma, normal):
print("Weird covariance matrix")
quit()
# fix seed of PyPolyaGamma, otherwise results aren't reproducible
ppgsseed = 4
if ppgsseed == 4:
print("Using default seed")
......@@ -52,7 +52,7 @@ class Dynamic_GLM(GibbsSampling):
self.T = T
self.jumplimit = jumplimit
self.x_0 = prior_mean
self.P_0, self.Q = P_0, Q
self.P_0, self.Q = P_0, Q # initial variance of regressors P_0, and variance for timesteps Q (can have different variances, we have it fixed)
self.psi_diff_saves = [] # this can be used to resample the variance, but is currently unused
self.noise_mean = np.zeros(self.n_regressors) # save this, so as to not keep creating it
self.identity = np.eye(self.n_regressors) # not really needed, but kinda useful for state sampling
......@@ -81,8 +81,11 @@ class Dynamic_GLM(GibbsSampling):
return outputs
def log_likelihood(self, input, timepoint):
"""
Given input from a single session (a matrix containing features and responses) and the timepoint information from that session, return log-likelihoods for observations.
"""
predictors, responses = input[:, :-1], input[:, -1]
nans = np.isnan(responses)
nans = np.isnan(responses) # can come from cross-validation
probs = np.zeros((input.shape[0], 2))
out = np.zeros(input.shape[0])
# I could possibly save the 1 / ..., since it's logged it's just - log (but the other half of the probs is an issue)
......@@ -97,17 +100,26 @@ class Dynamic_GLM(GibbsSampling):
# Gibbs sampling
def resample(self, data=[]):
"""
Resampling of dynamic logistic random variables.
We follow the resampling scheme of Windle: "Efficient Data Augmentation in Dynamic Models for Binary and Count Data".
This makes use of the forward filter backwards sample algorithm. Which uses Kalman filtering, for which we use Anderson & Moore 1979
Usually you just get one type of observation per timestep (one set of features), but we have a number of different feature sets per timestep. Thus,
we invent more timesteps, but we don't allow the regressors to change from these steps to steps, effectively making the observations occur at the same
time.
"""
# TODO: Clean up this mess, I always have to call delete_obs_data because of all the saved shit!
self.psi_diff_saves = []
summary_statistics, all_times = self._get_statistics(data)
types, pseudo_counts, counts = summary_statistics
types, pseudo_counts, counts = summary_statistics # pseudo_counts are kappa_t
# if state is never active, resample from prior, but without dynamic change
if len(counts) == 0:
self.weights = np.tile(np.random.multivariate_normal(mean=self.x_0, cov=self.P_0), (self.T, 1))
return
"""compute Kalman filter parameters, also sort out which weight vector goes to which timepoint"""
# Sort out which weight vector goes to which timepoint
timepoint_map = {}
total_types = 0
actual_obs_count = 0
......@@ -128,31 +140,42 @@ class Dynamic_GLM(GibbsSampling):
timepoint_map[t] = total_types - 1
prev_t = t
# We introduce pseudo-variance steps: we don't want change for the fake times, so only specific pseudo-Q's will be non-zero
self.pseudo_Q = np.zeros((total_types, self.n_regressors, self.n_regressors))
# TODO: is it okay to cut off last timepoint here?
for k in range(self.T):
if k in timepoint_map:
self.pseudo_Q[timepoint_map[k]] = self.Q[k] # for every timepoint, map it's variance onto the pseudo_Q
self.pseudo_Q[timepoint_map[k]] = self.Q[k] # for every real timepoint, map it's variance onto the pseudo_Q
"""sample pseudo obs"""
temp = np.empty(actual_obs_count)
"""Prepare for pseudo-observation sampling, collect all the psi's"""
psis = np.empty(actual_obs_count)
psi_count = 0
psi_counter = 0
predictors = []
for type, time in zip(types, all_times):
for t in type:
psis[psi_count] = np.sum(self.weights[time] * t)
# psi (for a specific observation type t) is weights at time multiplied with the predictors t, i.e. the log odds
psis[psi_counter] = np.sum(self.weights[time] * t)
predictors.append(t)
psi_count += 1
psi_counter += 1
ppgs.pgdrawv(np.concatenate(counts).astype(float), psis, temp)
# Here we use the pg package to draw augmenting variable from a polya-gamma distribution
# Windle calls the counts b_t = a_t + d_t, just the total number of times a psi has been seen. omega is the array in which our draws will be saved
omega = np.empty(actual_obs_count)
ppgs.pgdrawv(np.concatenate(counts).astype(float), psis, omega)
# R is the variance on the observations in Kalman filtering. According to Windle, that is 1 / omega here
self.R = np.zeros(total_types)
mask = np.ones(total_types, dtype=np.bool)
mask[fake_times] = False
self.R[mask] = 1 / temp
mask[fake_times] = False # during fake times, we make no observations, so no variance
self.R[mask] = 1 / omega
# Create pseudo-obs. They can be treated as if they came from N(psi, 1 / omega)
self.pseudo_obs = np.zeros(total_types)
self.pseudo_obs[mask] = np.concatenate(pseudo_counts) / temp
self.pseudo_obs[mask] = np.concatenate(pseudo_counts) / omega # Windle: pseudo-data points are z_t = kappa_t / omega_t
self.pseudo_obs = self.pseudo_obs.reshape(total_types, 1)
# H is the matrix that transforms the latents into the (means of the) observations
# in our case, these are the features, they multiply with the regressors to give us the means that we noisily observe
self.H = np.zeros((total_types, self.n_regressors, 1))
self.H[mask] = np.array(predictors).reshape(actual_obs_count, self.n_regressors, 1)
......@@ -161,38 +184,48 @@ class Dynamic_GLM(GibbsSampling):
self.compute_sigmas(total_types)
self.compute_means(total_types)
"""sample states"""
"""Sample states
We follow Apendix 1 from "On Gibbs Sampling for State Space Models" by Carter & Kohn"""
self.weights.fill(0)
pseudo_weights = np.empty((total_types, self.n_regressors))
# the first weight can be sampled directly, from the end of the Kalman inference process
pseudo_weights[total_types - 1] = np.random.multivariate_normal(self.x_hat_k[total_types - 1], self.sigma_k[total_types - 1])
normals = np.random.standard_normal((total_types - 1, self.n_regressors))
# the other weights now depend on the weight sampled just after them, we need to update the Kalman estimates
normals = np.random.standard_normal((total_types - 1, self.n_regressors)) # pregenerate some normals rvs to use later
for k in range(total_types - 2, -1, -1): # normally -1, but we already did first sampling
if np.all(self.pseudo_Q[k] == 0):
if np.all(self.pseudo_Q[k] == 0): # if this is a fake time, no change is allowed, just copy the weight
pseudo_weights[k] = pseudo_weights[k + 1]
else:
# start with the Kalman filter inferences
updated_x = self.x_hat_k[k].copy() # not sure whether copy is necessary here
updated_sigma = self.sigma_k[k].copy()
# then update them with the subsequent weights
for m in range(self.n_regressors):
# these are the Carter & Kohn equations, making use of the fact that F is identity, therefore x^tilde is x
# and delta_i is the variance Q
epsilon = pseudo_weights[k + 1, m] - updated_x[m]
state_R = updated_sigma[m, m] + self.pseudo_Q[k, m, m]
updated_x += updated_sigma[:, m] * epsilon / state_R # I don't think it's important, but probs we need the first column
updated_x += updated_sigma[:, m] * epsilon / state_R
updated_sigma -= updated_sigma.dot(np.outer(self.identity[m], self.identity[m])).dot(updated_sigma) / state_R
pseudo_weights[k] = local_multivariate_normal_draw(updated_x, updated_sigma, normals[k])
pseudo_weights[k] = local_multivariate_normal_draw(updated_x, updated_sigma, normals[k]) # transform our normal according to computed mean and var
# put the pseudo_weights where they belong
for k in range(self.T):
if k in timepoint_map:
self.weights[k] = pseudo_weights[timepoint_map[k]]
"""Sample before and after active times too"""
# Sample before and after active times too
# fill weights up before first determined weight, at least until the jumplimit is reached (in principle this would need to be shrunk by P_0)
for k in range(all_times[0] - 1, -1, -1):
if k > all_times[0] - self.jumplimit - 1:
self.weights[k] = self.weights[k + 1] + np.random.multivariate_normal(self.noise_mean, self.Q[k])
else:
self.weights[k] = self.weights[k + 1]
# fill weights up after last determined weights, at least until jumplimit
for k in range(all_times[-1] + 1, self.T):
if k < min(all_times[-1] + 1 + self.jumplimit, self.T):
self.weights[k] = self.weights[k - 1] + np.random.multivariate_normal(self.noise_mean, self.Q[k])
......@@ -204,7 +237,11 @@ class Dynamic_GLM(GibbsSampling):
# self.psi_diff_saves = np.concatenate(self.psi_diff_saves)
def _get_statistics(self, data):
# TODO: improve
"""For the data of a list of sessions, we need to know:
which unique features did the state encounter (types)
what is their corresponding pseudo-observation (pseudo_counts)
how often did they occur (counts)
"""
summary_statistics = [[], [], []]
times = []
if isinstance(data, np.ndarray):
......@@ -219,7 +256,8 @@ class Dynamic_GLM(GibbsSampling):
pseudo_counts = np.zeros(len(types))
for j, c in enumerate(counts):
mask = inverses == j
pseudo_counts[j] = np.sum(responses[mask]) - c / 2
# find out how the animals answered on these trials, and turn it into a pseudo-observation
pseudo_counts[j] = np.sum(responses[mask]) - c / 2 # already turn this into the desired format: kappa_t = a_t -b_t / 2
summary_statistics[0].append(types)
summary_statistics[1].append(pseudo_counts)
summary_statistics[2].append(counts)
......@@ -231,28 +269,33 @@ class Dynamic_GLM(GibbsSampling):
"""Sigmas can be precomputed (without z), we do this here."""
# We rely on the fact that H.T.dot(sigma).dot(H) is just a number, no matrix inversion needed
# furthermore we use the fact that many matrices are identities, namely F and G
# We follow Anderson & Moore 1979, p.44
self.sigma_k = [] # we have to reset this for repeating this calculation later for the resampling (R changes)
self.sigma_k_k_minus = [self.P_0]
self.gain_save = []
for k in range(T):
if self.R[k] == 0:
self.gain_save.append(None)
if self.R[k] == 0: # these are fake times, no observations were made but we allow change as per the jumplimit
self.gain_save.append(None) # need a placeholder
self.sigma_k.append(self.sigma_k_k_minus[k])
self.sigma_k_k_minus.append(self.sigma_k[k] + self.pseudo_Q[k])
self.sigma_k_k_minus.append(self.sigma_k[k] + self.pseudo_Q[k]) # the amount of allowed change just adds up if no obs
else:
sigma, H = self.sigma_k_k_minus[k], self.H[k] # we will need this a lot, so shorten it
gain = sigma.dot(H).dot(1 / (H.T.dot(sigma).dot(H) + self.R[k]))
self.gain_save.append(gain)
gain = sigma.dot(H).dot(1 / (H.T.dot(sigma).dot(H) + self.R[k])) # matrix K, elsewhere called gain
self.gain_save.append(gain) # the gain matrix is useful for compute_means as well, save it
# the two main equations, from p.44:
self.sigma_k.append(sigma - gain.dot(H.T).dot(sigma))
self.sigma_k_k_minus.append(self.sigma_k[k] + self.pseudo_Q[k])
def compute_means(self, T):
"""Compute the means, the estimates of the states.
Used to also contain self.x_hat_k_k_minus, but it's not necessary for our setup"""
Used to also contain self.x_hat_k_k_minus, but it's not necessary for our setup
(Especially (if not generally), with F=1, x_hat_k_k_minus is just equal to x_hat_k, see also p.40)"""
# We follow Anderson & Moore 1979, p.44
self.x_hat_k = [self.x_0] # we have to reset this for repeating this calculation later for the resampling
for k in range(T): # this will leave out last state which doesn't have observation
if self.gain_save[k] is None:
self.x_hat_k.append(self.x_hat_k[k])
self.x_hat_k.append(self.x_hat_k[k]) # mean stays unchanged if no observations
else:
x, H = self.x_hat_k[k], self.H[k] # we will need this a lot, so shorten it
self.x_hat_k.append(x + self.gain_save[k].dot(self.pseudo_obs[k] - H.T.dot(x)))
......@@ -260,6 +303,7 @@ class Dynamic_GLM(GibbsSampling):
self.x_hat_k.pop(0) # remove initialisation element from list
def num_parameters(self):
# Compute WAIC in principle
return self.weights.size
### Max likelihood
......