Skip to content
Snippets Groups Projects
Commit e442e1dd authored by Shashank Singh's avatar Shashank Singh
Browse files

Finished first draft of Gaussian denoising example.

parent dacb808d
No related branches found
No related tags found
No related merge requests found
import matplotlib
matplotlib.rcParams.update({'font.size': 22})
import matplotlib.pyplot as plt
import numpy as np
from sklearn.kernel_ridge import KernelRidge
# Fix random seed for reproducibility
np.random.seed(0)
# Simulation parameters
n = 2000
sigma_Y = 1.0
sigma_U = 0.2
# X-coordinates for plotting
xs = np.linspace(0, 1, 1001)
X = np.random.uniform(size=(n,1))
f_X = lambda x_s : (x_s * np.sin(4*np.pi*x_s)).squeeze()
Y = f_X(X) + sigma_U*np.random.normal(size=(n,))
Y_hat = Y + sigma_Y*np.random.normal(size=Y.shape)
# We observe (X, Y_hat = Y + epsilon), where epsilon ~ N(0, sigma_Y^2).
# We regress Y_hat over X to estimate E[Y_hat|X].
# Since E[epsilon] = 0, E[Y|X] = E[Y|X].
# Now we need to estimate the distribution of Y - E[Y|X].
# To do this, we estimate the distribution of Y_hat - E[Y_hat|X] and then deconvolve the distribution of epsilon.
regressor = KernelRidge(kernel='rbf', alpha=1e-8)
regressor.fit(X, Y_hat)
# Since everything is Gaussian, deconvolution consists simply of subtracting variances
sigma_Y_hat = (np.square((Y - Y_hat))).mean()**(1/2) # Estimated sd of epsilon
sigma_Y_hat_hat = (np.square((Y_hat - regressor.predict(X)))).mean()**(1/2)
sigma_U_hat = (sigma_Y_hat_hat**2 - sigma_Y_hat**2)**(1/2)
print(f'total_variance: {sigma_Y_hat_hat}')
print(f'excess_variance: {sigma_Y_hat}')
print(f'remaining_variance: {sigma_U_hat}')
plt.figure(figsize=(24, 8))
plt.subplot(1, 2, 1)
plt.scatter(X, Y_hat, label=r'Observed $(X, \widehat{Y})$')
plt.scatter(X, Y, label=r'True $(X, Y)$')
plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.xlim((0,1))
plt.ylim((-4, 4))
plt.legend()
# plt.subplot(1, 3, 2)
# plt.scatter(X, Y, label=r'True $Y$', color='orange')
# plt.plot(xs, regressor.predict(xs), label='Regression Line', color='blue')
# upper95 = regressor.predict(xs) + 2*sigma_Y_hat
# lower95 = regressor.predict(xs) - 2*sigma_Y_hat
# plt.fill_between(xs.squeeze(), lower95, upper95, color='gray', alpha=.2, label='95% CI')
# plt.ylim((-4, 4))
# plt.legend()
regression_curve = regressor.predict(xs.reshape((len(xs),1)))
plt.subplot(1, 2, 2)
plt.scatter(X, Y, label=r'$Y$', color='orange')
plt.plot(xs, regression_curve, label='Kernel Regression', color='blue')
naive_upper95 = regression_curve + 2*sigma_Y_hat
naive_lower95 = regression_curve - 2*sigma_Y_hat
plt.fill_between(xs, naive_lower95, naive_upper95, color='gray', alpha=.2, label='Naive 95% CI')
upper95 = regression_curve + 2*sigma_U_hat
lower95 = regression_curve - 2*sigma_U_hat
plt.fill_between(xs, lower95, upper95, color='b', alpha=.2, label='Deconvolved 95% CI')
plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.xlim((0,1))
plt.ylim((-4, 4))
plt.legend()
coverage = (np.abs(Y - regressor.predict(X)) < 2*sigma_U_hat).mean()
print(f'Coverage: {coverage}')
plt.savefig('figures/gaussian_denoising.png')
plt.savefig('figures/gaussian_denoising.pdf')
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment