import torch
import numpy as np
Let’s go hands-on
= torch.tensor([1,2,3.], dtype=torch.float32)
tensor1 = torch.tensor([5,6,7.], dtype=torch.float64)
tensor2 display(tensor1, tensor2)
tensor([1., 2., 3.])
tensor([5., 6., 7.], dtype=torch.float64)
type(tensor1), type(tensor2)) display(
torch.Tensor
torch.Tensor
display(tensor1.dtype, tensor2.dtype)
torch.float32
torch.float64
= tensor1.to(torch.int32) # device, dtype, tensor
long_tensor display(long_tensor)
tensor([1, 2, 3], dtype=torch.int32)
long_tensor.device
device(type='cpu')
= 'cuda' if torch.cuda.is_available() else 'cpu'
device = long_tensor.to(device)
long_tensor_gpu long_tensor_gpu
tensor([1, 2, 3], device='cuda:0', dtype=torch.int32)
= torch.zeros(2,10, device=device).to(torch.float64)
long_tensor_born_on_gpu long_tensor_born_on_gpu
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0',
dtype=torch.float64)
= torch.tensor([1.,2.]).to(long_tensor_born_on_gpu)
inspired_tensor inspired_tensor
tensor([1., 2.], device='cuda:0', dtype=torch.float64)
= np.array([1,2,3.])
np_array np_array.log()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) /tmp/ipykernel_7181/236375699.py in <module> 1 np_array = np.array([1,2,3.]) ----> 2 np_array.log() AttributeError: 'numpy.ndarray' object has no attribute 'log'
= torch.tensor([1,2,3.])
pt_array # sin(), cos(), tan(), exp() pt_array.log()
tensor([0.0000, 0.6931, 1.0986])
Gradient is all you need
import matplotlib.pyplot as plt
= torch.rand(5,1)
x = 3 * x + 2 + torch.randn_like(x)*0.1
y
; plt.scatter(x, y)
= torch.cat([torch.ones_like(x), x], dim=1)
x_plus_ones x_plus_ones.shape
torch.Size([5, 2])
= torch.zeros(2,1, requires_grad=True)
theta theta
tensor([[0.],
[0.]], requires_grad=True)
theta.grad
theta.grad_fn
= 0.1
lr
= x_plus_ones@theta
y_pred = ((y_pred - y)**2).mean()
loss
loss.backward()# y_pred = torch.matmul(x_plus_ones, theta)
# y_pred = torch.mm(x_plus_ones, theta)
# dloss/dtheta theta.grad
tensor([[-6.3681],
[-2.8128]])
theta.grad_fn
-= lr * theta.grad.data theta.data
theta
tensor([[0.6368],
[0.2813]], requires_grad=True)
theta.grad_fn
with torch.no_grad():
plt.scatter(x, y)@theta) plt.plot(x, x_plus_ones
for i in range(10):
theta.grad.data.zero_()= x_plus_ones@theta
y_pred = ((y_pred - y)**2).mean()
loss
loss.backward()-= lr * theta.grad theta.data
with torch.no_grad():
plt.scatter(x, y)@theta) plt.plot(x, x_plus_ones
Advanced
class LinearRegression(torch.nn.Module):
def __init__(self):
super().__init__()
self.theta = torch.nn.Parameter(torch.zeros(2,1))
# self.register_parameter(theta, torch.zeros(2,1))
def forward(self, x): # Don't call directly. it is called by __call__ method
= torch.cat([torch.ones_like(x), x], dim=1)
x_plus_ones = x_plus_ones@self.theta
y_pred return y_pred
= LinearRegression()
model model
LinearRegression()
for name, value in model.named_parameters():
print(name, value)
theta Parameter containing:
tensor([[0.],
[0.]], requires_grad=True)
= torch.optim.Adam(model.parameters(), lr=0.1)
optimizer = torch.nn.MSELoss() # torch.nn.CrossEntropyLoss()
loss_fn
for i in range(10):
optimizer.zero_grad()
= model(x)
y_pred = loss_fn(y_pred, y)
loss
loss.backward()
optimizer.step()
model.state_dict()
OrderedDict([('theta',
tensor([[0.9799],
[0.9808]]))])
Wanna run on GPU?
= x.to(device)
x_gpu = y.to(device) y_gpu
print(model.theta)
model.to(device)print(model.theta)
Parameter containing:
tensor([[0.9799],
[0.9808]], requires_grad=True)
Parameter containing:
tensor([[0.9799],
[0.9808]], device='cuda:0', requires_grad=True)
= torch.optim.Adam(model.parameters(), lr=0.1)
optimizer = torch.nn.MSELoss() # torch.nn.CrossEntropyLoss()
loss_fn
for i in range(10):
optimizer.zero_grad()
= model(x_gpu)
y_pred = loss_fn(y_pred, y_gpu)
loss
loss.backward()
optimizer.step()
State dictionary
# torch.save(model.state_dict(), path)
# model.load_state_dict(torch.load(path))
NN way
class LinearRegression(torch.nn.Module):
def __init__(self):
super().__init__()
self.layer = torch.nn.Linear(2, 1) # torch.nn.Linear(128, 64)
# What else?
# self.activation = torch.nn.ReLU()
# torch.nn.LSTM()
# torch.nn.Conv2d()
def forward(self, x): # Don't call directly. it is called by __call__ method
= torch.cat([torch.ones_like(x), x], dim=1)
x_plus_ones = self.layer(x_plus_ones)
y_pred return y_pred