import os
"CUDA_VISIBLE_DEVICES"] = "3"
os.environ[
import GPy
import torch
import torch.nn as nn
from tqdm import trange
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
= "cuda" device
Create a synthetic dataset
0)
np.random.seed(4)
torch.random.manual_seed(
= 50
N = np.linspace(-1, 1, N).reshape(-1, 1)
x = GPy.kern.RBF(input_dim=1, variance=1, lengthscale=0.1)
kernel = np.random.multivariate_normal(np.zeros(N), kernel.K(x)).reshape(-1, 1)
y = y + np.random.normal(0, 0.1, N).reshape(-1, 1)
y_noisy
= train_test_split(x, y_noisy, test_size=0.4, random_state=0)
train_x, test_x, train_y, test_y
="True");
plt.plot(x, y, label'o', label='train')
plt.plot(train_x, train_y, 'o', label='test')
plt.plot(test_x, test_y, ;
plt.legend()
= map(lambda x: torch.tensor(x).float().to(device), (x, y, y_noisy))
x, y, y_noisy = map(lambda x: torch.tensor(x).float().to(device), (train_x, test_x, train_y, test_y))
train_x, test_x, train_y, test_y print(x.shape, y.shape, y_noisy.shape)
torch.Size([50, 1]) torch.Size([50, 1]) torch.Size([50, 1])
Fit with a simple MLP
def fit(model, x, y, A=None, lr=0.01, epochs=100):
= torch.optim.Adam(model.parameters(), lr=lr)
optimizer = nn.MSELoss()
loss_fn
if A is None:
= (x,)
inputs else:
= (x, A)
inputs
= []
losses = trange(epochs)
pbar for epoch in pbar:
optimizer.zero_grad()= model(*inputs)
y_hat = loss_fn(y_hat, y)
loss
losses.append(loss.item())f"Epoch {epoch} Loss: {loss.item()}")
pbar.set_description(
loss.backward()
optimizer.step()
return losses
class SimpleMLP(nn.Module):
def __init__(self, features):
super().__init__()
= [nn.Linear(1, features[0]), nn.ReLU()]
layers for in_features, out_features in zip(features, features[1:]):
layers.append(nn.Linear(in_features, out_features))
layers.append(nn.ReLU())
-1], 1))
layers.append(nn.Linear(features[
self.layers = nn.Sequential(*layers)
def forward(self, x):
return self.layers(x)
0)
torch.manual_seed(= SimpleMLP([10, 10, 10]).to(device)
model =0.01, epochs=1000);
fit(model, train_x, train_y, lr
= model(x)
pred_y
= map(lambda x: x.cpu().detach().numpy(), (x, y, train_x, train_y, test_x, test_y, pred_y))
(x_, y_, train_x_, train_y_, test_x_, test_y_, pred_y_) ="True");
plt.plot(x_, y_, label'o', label='train')
plt.plot(train_x_, train_y_, 'o', label='test')
plt.plot(test_x_, test_y_, ='pred')
plt.plot(x_, pred_y_, label; plt.legend()
Epoch 999 Loss: 0.07143261283636093: 100%|██████████| 1000/1000 [00:02<00:00, 410.79it/s]
Create a GCN layer
class GCNLayer(nn.Module):
def __init__(self, in_features, out_features):
super().__init__()
self.linear = nn.Linear(in_features, out_features)
def forward(self, x, A):
return self.linear(A @ x)
class GCN(nn.Module):
def __init__(self, features):
super().__init__()
= [GCNLayer(1, features[0]), nn.ReLU()]
layers for in_features, out_features in zip(features, features[1:]):
layers.append(GCNLayer(in_features, out_features))
layers.append(nn.ReLU())
-1], 1))
layers.append(nn.Linear(features[self.layers = nn.Sequential(*layers)
def forward(self, x, A):
for layer in self.layers:
if isinstance(layer, GCNLayer):
= layer(x, A)
x else:
= layer(x)
x return x
def get_eucledean_A(x, exponent):
= ((x - x.T)**2)**0.5
d = torch.where(d==0, torch.min(d[d!=0])/2, d) # self distance is 0, so replace it with half of the min distance
d = 1/(d**exponent)
A return A/A.sum(dim=1, keepdim=True)
def get_KNN_A(x, k):
= torch.abs(x - x.T)
d = torch.zeros_like(d)
A = torch.topk(d, k, dim=1, largest=False)
_, indices for i, index in enumerate(indices):
= 1
A[i, index] return A/A.sum(dim=1, keepdim=True)
def fit_and_plot(title):
= GCN([10, 10, 10]).to(device)
model = fit(model, train_x, train_y, A=A_train, lr=0.001, epochs=3000);
losses
= model(x, A_all)
pred_y
= plt.subplots(1, 2, figsize=(12, 4))
fig, ax = ax[0]
axes
axes.plot(losses)"Losses")
axes.set_title(
= map(lambda x: x.cpu().detach().numpy(), (x, y, train_x, train_y, test_x, test_y, pred_y))
(x_, y_, train_x_, train_y_, test_x_, test_y_, pred_y_) = ax[1]
axes ="True");
axes.plot(x_, y_, label'o', label='train')
axes.plot(train_x_, train_y_, 'o', label='test')
axes.plot(test_x_, test_y_, ='pred')
axes.plot(x_, pred_y_, label
axes.set_title(title); axes.legend()
IDW setting
= 1
exponent = get_eucledean_A(train_x, exponent).to(device)
A_train = get_eucledean_A(x, exponent).to(device)
A_all = f"Distance based adjacency matrix with exponent {exponent}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.05447980388998985: 100%|██████████| 3000/3000 [00:07<00:00, 390.93it/s]
= 2
exponent = get_eucledean_A(train_x, exponent).to(device)
A_train = get_eucledean_A(x, exponent).to(device)
A_all = f"Distance based adjacency matrix with exponent {exponent}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.06475391983985901: 100%|██████████| 3000/3000 [00:07<00:00, 413.49it/s]
= 3
exponent = get_eucledean_A(train_x, exponent).to(device)
A_train = get_eucledean_A(x, exponent).to(device)
A_all = f"Distance based adjacency matrix with exponent {exponent}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.043554823845624924: 100%|██████████| 3000/3000 [00:08<00:00, 367.28it/s]
KNN Setting
= 1
K = get_KNN_A(train_x, K).to(device)
A_train = get_KNN_A(x, K).to(device)
A_all = f"KNN based adjacency matrix with K={K}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.04107221961021423: 100%|██████████| 3000/3000 [00:07<00:00, 383.88it/s]
= 3
K = get_KNN_A(train_x, K).to(device)
A_train = get_KNN_A(x, K).to(device)
A_all = f"KNN based adjacency matrix with K={K}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.14372628927230835: 100%|██████████| 3000/3000 [00:07<00:00, 404.74it/s]
= 7
K = get_KNN_A(train_x, K).to(device)
A_train = get_KNN_A(x, K).to(device)
A_all = f"KNN based adjacency matrix with K={K}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.13950258493423462: 100%|██████████| 3000/3000 [00:07<00:00, 381.66it/s]
= 15
K = get_KNN_A(train_x, K).to(device)
A_train = get_KNN_A(x, K).to(device)
A_all = f"KNN based adjacency matrix with K={K}"
title
fit_and_plot(title)
Epoch 2999 Loss: 0.33879855275154114: 100%|██████████| 3000/3000 [00:07<00:00, 376.56it/s]