We are warping inputs \(\mathbf{x}\) into \(\mathbf{w}\cdot\mathbf{x}\)
Learning second level GP over \(\mathbf{w}\).
Appling penalty over \(\mathbf{w}\) if varies too much unnecessary.
See problems at the end of the notebook.
We need to check mathematical concerns related to this transformation.
import mathimport numpy as npimport torchimport gpytorchfrom matplotlib import pyplot as pltimport regdata as rdfrom sklearn.cluster import KMeans
class ExactGPModel(gpytorch.models.ExactGP):def__init__(self, train_x, train_y, likelihood):super(ExactGPModel, self).__init__(train_x, train_y, likelihood)self.mean_module = gpytorch.means.ConstantMean()self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())def forward(self, x): mean_x =self.mean_module(x) covar_x =self.covar_module(x)return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)class ExactNSGPModel(gpytorch.models.ExactGP):def__init__(self, train_x, train_y, likelihood, num_latent):super(ExactNSGPModel, self).__init__(train_x, train_y, likelihood)# inds = np.random.choice(train_x.shape[0], size=num_latent, replace=False)# self.x_bar = train_x[inds]self.x_bar = torch.tensor(KMeans(n_clusters=num_latent).fit(train_x).cluster_centers_).to(train_x)self.w_bar = torch.nn.Parameter(torch.ones(num_latent,).to(self.x_bar))self.bias = torch.nn.Parameter(torch.zeros(1,).to(self.x_bar))self.latent_likelihood = gpytorch.likelihoods.GaussianLikelihood()# We can fix noise to be minimum but it is not ideal. Ideally, noise should automatically reduce to reasonable value.# self.latent_likelihood.raw_noise.requires_grad = False# self.latent_likelihood.raw_noise = torch.tensor(-10.)self.latent_model = ExactGPModel(self.x_bar, self.w_bar, self.latent_likelihood)self.mean_module = gpytorch.means.ConstantMean()self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())def forward(self, x):self.latent_model.eval()with gpytorch.settings.detach_test_caches(False): # needed to back propagate thru predictive posteriorself.latent_model.set_train_data(self.x_bar, self.w_bar, strict=False)self.w =self.latent_likelihood(self.latent_model(x)) # predictive posterior x_warped = x*self.w.mean[:, None] +self.bias mean_x =self.mean_module(x_warped) covar_x =self.covar_module(x_warped)return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
def training(model, likelihood): training_iter =100# Find optimal model hyperparameters model.train() likelihood.train()# Use the adam optimizer optimizer = torch.optim.Adam([ {'params': model.parameters()}, # Includes GaussianLikelihood parameters ], lr=0.1)# "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)for i inrange(training_iter):# Zero gradients from previous iteration optimizer.zero_grad()# Output from model output = model(train_x)# Calc loss and backprop gradientstry: loss =-mll(output, train_y) + torch.square(model.w.mean-1).mean()# print(model.latent_likelihood.noise)exceptAttributeError: loss =-mll(output, train_y) loss.backward()# print('Iter %d/%d - Loss: %.3f lengthscale: %.3f noise: %.3f' % (# i + 1, training_iter, loss.item(),# model.covar_module.base_kernel.lengthscale.item(),# model.likelihood.noise.item()# )) optimizer.step()def predict_plot(model, likelihood, title):# Get into evaluation (predictive posterior) mode model.eval() likelihood.eval()# Test points are regularly spaced along [0,1]# Make predictions by feeding model through likelihoodwith torch.no_grad(): observed_pred = likelihood(model(test_x))with torch.no_grad():# Initialize plot f, ax = plt.subplots(1, 1, figsize=(10, 6))# Get upper and lower confidence bounds lower, upper = observed_pred.confidence_region()# Plot training data as black stars ax.plot(train_x.numpy(), train_y.numpy(), 'k*')# Plot predictive means as blue line ax.plot(test_x.numpy(), observed_pred.mean.numpy(), 'b')# Shade between the lower and upper confidence bounds ax.fill_between(test_x.numpy().ravel(), lower.numpy(), upper.numpy(), alpha=0.5) ax.legend(['Observed Data', 'Mean', 'Confidence']) ax.set_title(title)return observed_pred
def GP(num_latent):# initialize likelihood and model likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactGPModel(train_x, train_y, likelihood) training(model, likelihood) predict_plot(model, likelihood, 'GP')def NSGP(num_latent):# initialize likelihood and model likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactNSGPModel(train_x, train_y, likelihood, num_latent) training(model, likelihood) observed_pred = predict_plot(model, likelihood, 'NSGP')with torch.no_grad(): model.train() model.forward(test_x) plt.figure(figsize=(10,6)) plt.plot(test_x*model.w.mean[:, None], observed_pred.mean.numpy()) plt.title('Warped test inputs v/s test outputs')with torch.no_grad(): model.train() model.forward(test_x) plt.figure(figsize=(10,6)) plt.plot(test_x, model.w.mean, label='interpolated') plt.scatter(model.x_bar, model.w_bar, label='learned') plt.ylim(0,2) plt.title('Test input v/s weights') plt.legend()
Comments