Skip to content
Snippets Groups Projects
Commit 18872fc1 authored by Shashank Singh's avatar Shashank Singh
Browse files

Deleted old Experiment 1 and added new Experiment 1 (Figure 3).

parent 2ec2c690
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,12 @@ A preprint of the paper is available on [ArXiv](https://arxiv.org/abs/2206.01454
This repository is still under construction, and updated code, as well as instructions for reproducing the results in that paper, will be added soon.
To reproduce Figure 3, run:
python figure_3.py
This may take a few hours to run.
To reproduce Figure 5, run:
python figure_5.py
......
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
from tqdm import tqdm
num_replicates = 5 # Number of IID replications of experiment
n_train = 1000 # Training sample size
n_test = 100 # Test sample size
num_iters = 1001 # Number of training iterations
alpha_Z = tf.Variable([2.], name='alpha_Z')
alpha_eps = tf.Variable([3., 1.], name='alpha_eps')
beta_X = tf.Variable([0.], name='beta_X')
beta_eps = tf.Variable([1., 3.], name='beta_eps')
def construct_data_generating_model(alpha_Z, alpha_eps, beta_X, beta_eps):
return tfd.JointDistributionSequential([
tfd.Normal(loc=[0.], scale=1., name='Z', validate_args=True),
tfd.Normal(loc=0., scale=[1., 1.], name='eps', validate_args=True),
lambda eps, Z: tfd.Normal(
loc=[tf.tensordot(alpha_Z, Z, axes=1) + tf.tensordot(alpha_eps, eps, axes=1)],
scale=1.,
name='X', validate_args=True),
lambda X, eps: tfd.Normal(
loc=tf.tensordot(beta_X, X, axes=1) + tf.tensordot(beta_eps, eps, axes=1),
scale=1.,
name='Y', validate_args=True),
], batch_ndims=0, use_vectorized_map=True)
joint = construct_data_generating_model(alpha_Z, alpha_eps, beta_X, beta_eps)
def construct_data_fitting_model(alpha_Z, beta_X):
return tfd.JointDistributionSequential([
tfd.Normal(loc=[0.], scale=1., name='Z', validate_args=True),
lambda Z: tfd.Normal(
loc=[tf.tensordot(alpha_Z, Z, axes=1)],
scale=1.,
name='X', validate_args=True),
lambda X, Z: tfd.Normal(
loc=beta_X*tf.tensordot(alpha_Z, Z, axes=1),
scale=1.,
name='Y', validate_args=True),
], batch_ndims=0, use_vectorized_map=True)
def fit_model(Z, X, Y):
trainable_model_args = [
tf.Variable(np.random.normal([0], 1), dtype=np.float32, name='alpha_Z_hat'),
tf.Variable(np.random.normal([0], 1), dtype=np.float32, name='beta_X_hat')
]
# SPECIFY LOG-LIKELIHOOD TRAINING OBJECTIVE AND OPTIMIZER
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
def log_prob():
trainable_model = construct_data_fitting_model(*trainable_model_args)
return tf.math.reduce_sum(trainable_model.log_prob(Z, X, Y))
@tf.function(autograph=True)
def train_op(trainable_model_args):
"""Apply a gradient update."""
with tf.GradientTape() as tape:
neg_log_prob = -log_prob()
grads = tape.gradient(neg_log_prob, trainable_model_args)
optimizer.apply_gradients(zip(grads, trainable_model_args))
return neg_log_prob, trainable_model_args
# loss_history = []
# alpha_Z_history = []
# beta_X_history = []
for step in range(num_iters):
loss, model_args = train_op(trainable_model_args)
# loss_history.append(loss)
# alpha_Z_history.append(model_args[0])
# beta_X_history.append(model_args[1])
print(model_args[0].numpy())
return [arg.numpy() for arg in model_args]
beta_X_hats = []
for replicate in tqdm(range(num_replicates)):
# Draw training data
Z_train, _, X_train, Y_train = joint.sample(n_train)
alpha_Z_hat, beta_X_hat = fit_model(Z_train, X_train, Y_train)
beta_X_hats.append(beta_X_hat)
plt.hist(beta_X_hats)
# plt.subplot(3, 1, 1)
# plt.plot(loss_history)
# plt.ylabel('loss')
# plt.subplot(3, 1, 2)
# plt.plot(alpha_Z_history, label=r'$\hat\alpha_Z$')
# plt.plot([1, num_iters], [2., 2.], label=r'$\alpha_Z$')
# plt.legend()
# plt.subplot(3, 1, 3)
# plt.plot(beta_X_history, label=r'$\hat\beta_X$')
# plt.plot([1, num_iters], [0., 0.], label=r'$\beta_X$')
# plt.legend()
# plt.xlabel('Training Iteration')
plt.show()
......@@ -3,133 +3,112 @@ import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from passive_knn_regressor import passive_knn_regressor, active_knn_regressor, oracle_knn_regressor
from knn_regressors import passive_knn_regressor, active_knn_regressor, oracle_knn_regressor
from sampler import Sampler
plt.rc('font', size=20) # Increase matplotlib default font size
np.random.seed(0) # Fix random seed for reproducibility
num_replicates = 256 # Number of IID replications of experiment TODO: 256
num_replicates = 1024 # Number of IID replications of experiment
def _compare_models(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys):
def _compare_models(ns, s_gs, d_Zs, d_Xs, sigma_Xs, sigma_Ys, f):
passive_errors = np.zeros((len(ns), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
passive_cv_errors = np.zeros((len(ns), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
active_errors = np.zeros((len(ns), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
active_cv_errors = np.zeros((len(ns), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
oracle_errors = np.zeros((len(ns), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
oracle_cv_errors = np.zeros((len(ns), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
passive_errors = np.zeros((len(ns), len(s_gs), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
passive_cv_errors = np.zeros((len(ns), len(s_gs), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
active_errors = np.zeros((len(ns), len(s_gs), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
active_cv_errors = np.zeros((len(ns), len(s_gs), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
oracle_errors = np.zeros((len(ns), len(s_gs), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
oracle_cv_errors = np.zeros((len(ns), len(s_gs), len(d_Zs), len(d_Xs), len(sigma_Xs), len(sigma_Ys), num_replicates))
for replicate in tqdm(range(num_replicates)):
for d_Z_idx, d_Z in enumerate(d_Zs):
for d_X_idx, d_X in enumerate(d_Xs):
for sigma_X_idx, sigma_X in enumerate(sigma_Xs):
for sigma_Y_idx, sigma_Y in enumerate(sigma_Ys):
sampler = Sampler(d_Z, d_X, sigma_X, sigma_Y)
x_0 = sampler.x_0
f_x_0 = sampler.f(x_0)
for n_idx, n in enumerate(ns):
for s_g_idx, s_g in enumerate(s_gs):
sampler = Sampler(d_Z, d_X, sigma_X, sigma_Y, s_g, f)
x_0 = sampler.x_0
f_x_0 = sampler.f(x_0)
for n_idx, n in enumerate(ns):
Xs, Ys, Zs = sampler.generate_joint_samples(n)
passive_estimate = passive_knn_regressor(Xs, Ys, Zs, x_0, sigma_Y=sigma_Y)
passive_errors[n_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = passive_estimate - f_x_0
Xs, Ys, Zs = sampler.generate_joint_samples(n)
passive_estimate = passive_knn_regressor(Xs, Ys, Zs, x_0, sigma_Y=sigma_Y)
passive_errors[n_idx, s_g_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = passive_estimate - f_x_0
passive_cv_estimate = passive_knn_regressor(Xs, Ys, Zs, x_0, 'cv')
passive_cv_errors[n_idx, d_Z_idx, d_X_idx, sigma_X_idx,sigma_Y_idx, replicate] = passive_cv_estimate - f_x_0
passive_cv_estimate = passive_knn_regressor(Xs, Ys, Zs, x_0, 'cv')
passive_cv_errors[n_idx, s_g_idx, d_Z_idx, d_X_idx, sigma_X_idx,sigma_Y_idx, replicate] = passive_cv_estimate - f_x_0
active_estimate = active_knn_regressor(sampler, x_0, n, sigma_X=sigma_X, sigma_Y=sigma_Y)
active_errors[n_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = active_estimate - f_x_0
active_estimate, _, _, _ = active_knn_regressor(sampler, x_0, n, sigma_X=sigma_X, sigma_Y=sigma_Y)
active_errors[n_idx, s_g_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = active_estimate - f_x_0
active_cv_estimate = active_knn_regressor(sampler, x_0, n, 'cv')
active_cv_errors[n_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = active_cv_estimate - f_x_0
active_cv_estimate, _, _, _ = active_knn_regressor(sampler, x_0, n, 'cv')
active_cv_errors[n_idx, s_g_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = active_cv_estimate - f_x_0
oracle_estimate = oracle_knn_regressor(sampler, x_0, sampler.z_0, n, sigma_Y=sigma_Y)
oracle_errors[n_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = oracle_estimate - f_x_0
oracle_estimate = oracle_knn_regressor(sampler, x_0, sampler.z_0, n, sigma_Y=sigma_Y)
oracle_errors[n_idx, s_g_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = oracle_estimate - f_x_0
oracle_cv_estimate = oracle_knn_regressor(sampler, x_0, sampler.z_0, n, method='cv')
oracle_cv_errors[n_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = oracle_cv_estimate - f_x_0
oracle_cv_estimate = oracle_knn_regressor(sampler, x_0, sampler.z_0, n, method='cv')
oracle_cv_errors[n_idx, s_g_idx, d_Z_idx, d_X_idx, sigma_X_idx, sigma_Y_idx, replicate] = oracle_cv_estimate - f_x_0
return passive_errors, passive_cv_errors, active_cv_errors, active_errors, oracle_errors, oracle_cv_errors
# plt.subplot(3, 1, 1)
# plt.scatter(Zs, Xs[0, :])
# plt.subplot(3, 1, 2)
# plt.scatter(Zs, Xs[1, :])
# plt.subplot(3, 1, 3)
# plt.scatter(Zs, Ys)
# plt.show()
def _plot_all(xs, ns=[2**12], s_gs=[1.0], d_Zs=[3], d_Xs=[3], sigma_Xs=[1.0], sigma_Ys=[1.0], f=None):
def _plot_all(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys, xs):
passive_errors, passive_cv_errors, active_cv_errors, active_errors, oracle_errors, oracle_cv_errors = (x.squeeze() for x in _compare_models(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys))
passive_errors, passive_cv_errors, active_cv_errors, active_errors, oracle_errors, oracle_cv_errors = (x.squeeze() for x in _compare_models(ns, s_gs, d_Zs, d_Xs, sigma_Xs, sigma_Ys, f))
def _mse(errors):
return (errors**2).mean(axis=1)
def _ste(errors):
return (errors**2).std(axis=1)/(num_replicates**(1/2))
plt.errorbar(xs, _mse(passive_errors), _ste(passive_errors), marker='o', markersize=10, lw=2)
plt.errorbar(xs, _mse(passive_cv_errors), _ste(passive_cv_errors), ls='--', marker='o', markersize=10, lw=2)
plt.errorbar(xs, _mse(active_errors), _ste(active_errors), marker='^', markersize=10, lw=2)
plt.errorbar(xs, _mse(active_cv_errors), _ste(active_cv_errors), ls='--', marker='^', markersize=10, lw=2)
plt.errorbar(xs, _mse(oracle_errors), _ste(oracle_errors), marker='s', markersize=10, lw=2)
plt.errorbar(xs, _mse(oracle_cv_errors), _ste(oracle_cv_errors), ls='--', marker='s', markersize=10, lw=2)
# Plot performance over n with d_Z=3, sigma_X=1.0, d_X=3, and sigma_Y=1.0
plt.errorbar(xs, _mse(passive_errors), _ste(passive_errors), marker='o', markersize=10, lw=2, label='Passive')
plt.errorbar(xs, _mse(passive_cv_errors), _ste(passive_cv_errors), ls='--', marker='o', markersize=10, lw=2, label='Passive CV')
plt.errorbar(xs, _mse(active_errors), _ste(active_errors), marker='^', markersize=10, lw=2, label='Active')
plt.errorbar(xs, _mse(active_cv_errors), _ste(active_cv_errors), ls='--', marker='^', markersize=10, lw=2, label='Active CV')
plt.errorbar(xs, _mse(oracle_errors), _ste(oracle_errors), marker='s', markersize=10, lw=2, label='Oracle')
plt.errorbar(xs, _mse(oracle_cv_errors), _ste(oracle_cv_errors), ls='--', marker='s', markersize=10, lw=2, label='Oracle CV')
# Plot performance over n
plt.subplot(2, 3, 1)
ns = np.logspace(4, 12, 13, base=2., dtype=int)
d_Zs = [3]
d_Xs = [None]
sigma_Xs = [1.0]
sigma_Ys = [1.0]
_plot_all(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys, ns)
ns = np.logspace(4, 12, 9, base=2., dtype=int)
_plot_all(ns=ns, xs=ns)
plt.gca().set_xscale('log', base=2.0)
plt.gca().set_yscale('log')
plt.xlabel('Sample size ($n$)')
plt.ylabel('Mean Squared Error')
plt.legend(['Passive', 'Passive CV', 'Active', 'Active CV', 'Oracle', 'Oracle CV'])
plt.legend()
# Plot performance over d_Z with n=2**10, sigma_X=1.0, d_X=3, and sigma_Y=1.0
# Plot performance over d_Z
plt.subplot(2, 3, 2)
ns = [2**10]
d_Zs = [1, 2, 3, 4, 5, 6]
d_Xs = [None]
sigma_Xs = [1.0]
sigma_Ys = [1.0]
_plot_all(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys, d_Zs)
d_Zs = range(1, 11)
_plot_all(d_Zs=d_Zs, xs=d_Zs)
plt.gca().set_yscale('log')
plt.xlabel('Intrinsic Dimension ($d_Z$)')
# Plot performance over sigma_X with n=2**10, d_Z=3, d_X=3, and sigma_Y=1.0
# Plot performance over sigma_X
plt.subplot(2, 3, 3)
ns = [2**10]
d_Zs = [3]
d_Xs = [None]
sigma_Xs = [0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0]
sigma_Ys = [1.0]
_plot_all(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys, sigma_Xs)
plt.gca().set_yscale('log')
plt.gca().set_xscale('log')
sigma_Xs = np.logspace(-1, 1, 10)
_plot_all(sigma_Xs=sigma_Xs, xs=sigma_Xs)
plt.loglog()
plt.xlabel('Covariate Noise Level ($\sigma_X$)')
# Plot performance over s_g
plt.subplot(2, 3, 4)
s_gs = np.linspace(0.1, 1.0, 10)
_plot_all(s_gs=s_gs, xs=s_gs)
plt.gca().set_yscale('log')
plt.xlabel('Smoothness ($s_g$) of $g$')
plt.ylabel('Mean Squared Error')
# Plot performance over d_X with sigma_X with n=2**10, d_Z=3, sigma_X=1, and sigma_Y=1.0
# Plot performance over d_X
plt.subplot(2, 3, 5)
ns = [2**10]
d_Zs = [3]
d_Xs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sigma_Xs = [1.0]
sigma_Ys = [1.0]
_plot_all(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys, d_Xs)
d_Xs = range(1, 11)
_plot_all(d_Xs=d_Xs, xs=d_Xs)
plt.gca().set_yscale('log')
plt.xlabel('Covariate Noise Dimension ($d_X$)')
# Plot performance over sigma_Y with sigma_X with n=2**10, d_Z=3, d_X=3, and sigma_X=1
# Plot performance over sigma_Y
plt.subplot(2, 3, 6)
ns = [2**10]
d_Zs = [3]
d_Xs = [None]
sigma_Xs = [1.0]
sigma_Ys = [0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0]
_plot_all(ns, d_Zs, d_Xs, sigma_Xs, sigma_Ys, sigma_Ys)
sigma_Ys = np.logspace(-1, 1, 10)
_plot_all(sigma_Ys=sigma_Ys, xs=sigma_Ys)
plt.loglog()
plt.xlabel('Response Noise Level ($\sigma_Y$)')
......
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
......@@ -10,12 +11,21 @@ def _CV_knn_regressor(n, ks=None, cv=4):
if not ks:
ks = np.logspace(0, int(np.log2(n)-2), int(np.log2(n)-1), base=2., dtype=int)
ks = [k for k in ks if k <= 100]
if ks == []:
ks = [1]
regressor = KNeighborsRegressor()
param_grid = {'n_neighbors': ks}
knn_gscv = GridSearchCV(regressor, param_grid, cv=cv)
return knn_gscv
def passive_kernel_regressor(Xs, Ys, Zs, x_0, method='theory', alpha=1.0):
regressor = GaussianProcessRegressor(alpha=alpha)
regressor.fit(Xs, Ys)
return regressor.predict(x_0)
def passive_knn_regressor(Xs, Ys, Zs, x_0, method='theory', sigma_Y=1.0):
"""Regresses passive samples of Y over X."""
......@@ -43,7 +53,7 @@ def active_knn_regressor(sampler, x_0, n, method='theory', sigma_X=1.0, sigma_Y=
Xs1, Ys1, Zs1 = sampler.generate_joint_samples(m)
if method == 'theory':
d_Z = Zs1.shape[1]
k_g = max(1, min(m, int(m**(2/(2 + d_Z))))) # TODO: Incorporate sigma_X
k_g = max(1, min(m, int(((sigma_X)**(2*d_Z/(2 + d_Z)))*m**(2/(2 + d_Z)))))
g_regressor = KNeighborsRegressor(n_neighbors=k_g)
elif method == 'cv':
......@@ -58,10 +68,11 @@ def active_knn_regressor(sampler, x_0, n, method='theory', sigma_X=1.0, sigma_Y=
best_z = Zs1[np.argmin(errors), :]
# Purely exploitative sampling
Xs2, Ys2, _ = sampler.generate_conditional_samples(best_z, n - m)
Xs2, Ys2, Zs2 = sampler.generate_conditional_samples(best_z, n - m)
Xs = np.concatenate((Xs1, Xs2))
Ys = np.concatenate((Ys1, Ys2))
Zs = np.concatenate((Zs1, Zs2))
if method == 'theory':
d_X = Xs.shape[1]
......@@ -71,7 +82,7 @@ def active_knn_regressor(sampler, x_0, n, method='theory', sigma_X=1.0, sigma_Y=
f_regressor =_CV_knn_regressor(len(Ys))
f_regressor.fit(Xs, Ys)
return f_regressor.predict(x_0)
return f_regressor.predict(x_0), Xs, Ys, Zs
def oracle_knn_regressor(sampler, x_0, z_0, n, method='theory', sigma_Y=1.0):
......
......@@ -2,23 +2,15 @@ import numpy as np
class Sampler:
def __init__(self, d_Z, d_X, sigma_X, sigma_Y):
def __init__(self, d_Z, d_X, sigma_X, sigma_Y, s_g, f):
self.d_Z = d_Z
self.sigma_X = sigma_X
self.sigma_Y = sigma_Y
if d_X:
self.d_X = d_X
self.g = lambda z: np.array(np.tile(z.sum(axis=1), [d_X, 1])).transpose()
self.f = lambda x: (x**2).sum(axis=1)
self.x_0 = np.zeros((1, d_X))
self.z_0 = np.zeros((1, d_Z))
else:
d_X = 3
self.g = lambda z: np.array([np.sin(z), np.cos(z), z]).mean(axis=2).transpose()
self.f = lambda x: x[:, 0]**2 + 3*x[:, 1]/(1 + x[:, 2])
self.x_0 = np.array([[1, 0, np.pi/2]])
self.z_0 = np.pi/2 * np.ones((1, d_Z))
self.g = lambda z: np.tile(4*(np.abs(z)**(s_g)).sum(axis=1), [d_X, 1]).transpose()
self.f = lambda x: (x**2).sum(axis=1)
self.z_0 = np.zeros((1, d_Z))
self.x_0 = np.zeros((1, d_X))
def generate_conditional_samples(self, z, n):
"""Generates n independent samples of (X, Y) conditioned on Z = z."""
......@@ -36,7 +28,7 @@ class Sampler:
def generate_joint_samples(self, n):
"""Generates n independent samples of (X, Y, Z)."""
Zs = np.random.uniform(low=0., high=2*np.pi, size=[n, self.d_Z])
Zs = np.random.uniform(low=0., high=1., size=[n, self.d_Z])
Xs = self.g(Zs)
Xs = Xs + np.random.normal(0, self.sigma_X, Xs.shape)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment