PyTorch Crash Course 1713016363
PyTorch Crash Course 1713016363
Overview:
1. Tensor Basics
• Create, Operations, NumPy, GPU Support
1. Autograd
• Linear regression example
1. Training Loop with: Model, Loss & Optimizer
• A typical PyTorch training pipeline
1. Neural Network
• Also: GPU, Datasets, DataLoader, Transforms & Evaluation
1. Convolutional Neural Network
• Also: Save/Load model
1. Tensors
Everything in PyTorch is based on Tensor operations. A Tensor is a multi-dimensional matrix
containing elements of a single data type:
import torch
# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
print("empty(1):", x)
x = torch.empty(3) # vector
print("empty(3):",x)
x = torch.empty(2, 3) # matrix
print("empty(2,3):",x)
x = torch.empty(2, 2, 3) # tensor, 3 dimensions
#x = torch.empty(2,2,2,3) # tensor, 4 dimensions
print("empty(2, 2, 3):",x)
# check size
print("size", x.size()) # x.size(0)
print("shape", x.shape) # x.shape[0]
# check data type
print(x.dtype)
# check type
print(x.dtype)
# requires_grad argument
# This will tell pytorch that it will need to calculate the gradients
for this tensor
# later in your optimization steps
# i.e. this is a variable in your model that you want to optimize
x = torch.tensor([5.5, 3], requires_grad=True)
print(x)
# elementwise addition
z = x + y
# torch.add(x,y)
print(x)
print(y)
print(z)
# subtraction
z = x - y
z = torch.sub(x, y)
# multiplication
z = x * y
z = torch.mul(x,y)
# division
z = x / y
z = torch.div(x,y)
# Slicing
x = torch.rand(5,3)
print(x)
print("x[:, 0]", x[:, 0]) # all rows, column 0
print("x[1, :]", x[1, :]) # row 1, all columns
print("x[1, 1]", x[1,1]) # element at 1, 1
NumPy
Converting a Torch Tensor to a NumPy array and vice versa is very easy
a = torch.ones(5)
print(a)
GPU Support
By default all tensors are created on the CPU. But we can also move them to the GPU (if it's
available ), or create them directly on the GPU.
2. Autograd
The autograd package provides automatic differentiation for all operations on Tensors.
Generally speaking, torch.autograd is an engine for computing the vector-Jacobian product. It
computes partial derivates while applying the chain rule.
import torch
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)
print(x.grad)
z.backward()
print(x.grad) # dz/dx
• x.requires_grad_(False)
• x.detach()
• wrap in with torch.no_grad():
# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2, 2)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)
a.requires_grad_(True)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)
# .detach(): get a new Tensor with the same content but no gradient
computation:
a = torch.randn(2, 2, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)
import torch
# Linear regression
# f = w * x + b
# here : f = 2 * x
# model output
def forward(x):
return w * x
# loss = MSE
def loss(y, y_pred):
return ((y_pred - y)**2).mean()
X_test = 5.0
# Training
learning_rate = 0.01
n_epochs = 100
# loss
l = loss(Y, y_pred)
# update weights
#w.data = w.data - learning_rate * w.grad
with torch.no_grad():
w -= learning_rate * w.grad
if (epoch+1) % 10 == 0:
print(f'epoch {epoch+1}: w = {w.item():.3f}, loss =
{l.item():.3f}')
# Linear regression
# f = w * x
# here : f = 2 * x
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
# define different layers
self.lin = nn.Linear(input_dim, output_dim)
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# 3) Training loop
for epoch in range(n_epochs):
# predict = forward pass with our model
y_predicted = model(X)
# loss
l = loss(Y, y_predicted)
# update weights
optimizer.step()
if (epoch+1) % 10 == 0:
w, b = model.parameters() # unpack parameters
print('epoch ', epoch+1, ': w = ', w[0][0].item(), ' loss = ',
l.item())
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 784 # 28x28
hidden_size = 500
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
examples = iter(test_loader)
example_data, example_targets = examples.next()
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(example_data[i][0], cmap='gray')
plt.show()
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.l2 = nn.Linear(hidden_size, num_classes)
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step
[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
outputs = model(images)
• Convolutional Layers
• MaxPooling
• Save/Load model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
num_epochs = 10
batch_size = 32
learning_rate = 0.001
test_dataset = torchvision.datasets.CIFAR10(root='./data',
train=False,
download=True,
transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size,
shuffle=False)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(imgs):
imgs = imgs / 2 + 0.5 # unnormalize
npimgs = imgs.numpy()
plt.imshow(np.transpose(npimgs, (1, 2, 0)))
plt.show()
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 64, 3)
self.fc1 = nn.Linear(64*4*4, 64)
self.fc2 = nn.Linear(64, 10)
model = ConvNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
running_loss = 0.0
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
running_loss += loss.item()
print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
loaded_model = ConvNet()
loaded_model.load_state_dict(torch.load(PATH)) # it takes the loaded
dictionary, not the path file itself
loaded_model.to(device)
loaded_model.eval()
with torch.no_grad():
n_correct = 0
n_correct2 = 0
n_samples = len(test_loader.dataset)
outputs2 = loaded_model(images)
_, predicted2 = torch.max(outputs2, 1)
n_correct2 += (predicted2 == labels).sum().item()