Finished first draft of Gaussian denoising example.

e442e1dd · Shashank Singh · dacb808d · e442e1dd
Commit e442e1dd authored 1 year ago by Shashank Singh
--- a/gaussian_denoising_sim.py
+++ b/gaussian_denoising_sim.py
+import matplotlib
+matplotlib.rcParams.update({'font.size': 22})
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.kernel_ridge import KernelRidge
+
+# Fix random seed for reproducibility
+np.random.seed(0)
+
+# Simulation parameters
+n = 2000
+sigma_Y = 1.0
+sigma_U = 0.2
+
+# X-coordinates for plotting
+xs = np.linspace(0, 1, 1001)
+
+X = np.random.uniform(size=(n,1))
+f_X = lambda x_s : (x_s * np.sin(4*np.pi*x_s)).squeeze()
+Y = f_X(X) + sigma_U*np.random.normal(size=(n,))
+Y_hat = Y + sigma_Y*np.random.normal(size=Y.shape)
+
+# We observe (X, Y_hat = Y + epsilon), where epsilon ~ N(0, sigma_Y^2).
+# We regress Y_hat over X to estimate E[Y_hat|X].
+# Since E[epsilon] = 0, E[Y|X] = E[Y|X].
+# Now we need to estimate the distribution of Y - E[Y|X].
+# To do this, we estimate the distribution of Y_hat - E[Y_hat|X] and then deconvolve the distribution of epsilon.
+
+regressor = KernelRidge(kernel='rbf', alpha=1e-8)
+regressor.fit(X, Y_hat)
+
+# Since everything is Gaussian, deconvolution consists simply of subtracting variances
+sigma_Y_hat = (np.square((Y - Y_hat))).mean()**(1/2)  # Estimated sd of epsilon
+sigma_Y_hat_hat = (np.square((Y_hat - regressor.predict(X)))).mean()**(1/2)
+sigma_U_hat = (sigma_Y_hat_hat**2 - sigma_Y_hat**2)**(1/2)
+
+print(f'total_variance: {sigma_Y_hat_hat}')
+print(f'excess_variance: {sigma_Y_hat}')
+print(f'remaining_variance: {sigma_U_hat}')
+
+plt.figure(figsize=(24, 8))
+plt.subplot(1, 2, 1)
+plt.scatter(X, Y_hat, label=r'Observed $(X, \widehat{Y})$')
+plt.scatter(X, Y, label=r'True $(X, Y)$')
+plt.xlabel('$X$')
+plt.ylabel('$Y$')
+plt.xlim((0,1))
+plt.ylim((-4, 4))
+plt.legend()
+
+# plt.subplot(1, 3, 2)
+# plt.scatter(X, Y, label=r'True $Y$', color='orange')
+# plt.plot(xs, regressor.predict(xs), label='Regression Line', color='blue')
+# upper95 = regressor.predict(xs) + 2*sigma_Y_hat
+# lower95 = regressor.predict(xs) - 2*sigma_Y_hat
+# plt.fill_between(xs.squeeze(), lower95, upper95, color='gray', alpha=.2, label='95% CI')
+# plt.ylim((-4, 4))
+# plt.legend()
+
+regression_curve = regressor.predict(xs.reshape((len(xs),1)))
+
+plt.subplot(1, 2, 2)
+plt.scatter(X, Y, label=r'$Y$', color='orange')
+plt.plot(xs, regression_curve, label='Kernel Regression', color='blue')
+naive_upper95 = regression_curve + 2*sigma_Y_hat
+naive_lower95 = regression_curve - 2*sigma_Y_hat
+plt.fill_between(xs, naive_lower95, naive_upper95, color='gray', alpha=.2, label='Naive 95% CI')
+upper95 = regression_curve + 2*sigma_U_hat
+lower95 = regression_curve - 2*sigma_U_hat
+plt.fill_between(xs, lower95, upper95, color='b', alpha=.2, label='Deconvolved 95% CI')
+plt.xlabel('$X$')
+plt.ylabel('$Y$')
+plt.xlim((0,1))
+plt.ylim((-4, 4))
+plt.legend()
+
+coverage = (np.abs(Y - regressor.predict(X)) < 2*sigma_U_hat).mean()
+
+print(f'Coverage: {coverage}')
+
+plt.savefig('figures/gaussian_denoising.png')
+plt.savefig('figures/gaussian_denoising.pdf')
+plt.show()