0% found this document useful (0 votes)
11 views4 pages

Adaline SGD

This document details the implementation of the ADALINE model using Stochastic Gradient Descent with a focus on understanding the underlying mechanics without relying on PyTorch's automatic differentiation. It includes steps for loading a toy dataset, normalizing it, training the model, and evaluating its performance through accuracy metrics and visualization of decision boundaries. The results show a training accuracy of 90% and a test accuracy of 96.67%.

Uploaded by

pnqanh.gdsciu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views4 pages

Adaline SGD

This document details the implementation of the ADALINE model using Stochastic Gradient Descent with a focus on understanding the underlying mechanics without relying on PyTorch's automatic differentiation. It includes steps for loading a toy dataset, normalizing it, training the model, and evaluating its performance through accuracy metrics and visualization of decision boundaries. The results show a training accuracy of 90% and a test accuracy of 96.67%.

Uploaded by

pnqanh.gdsciu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

ADALINE with Stochastic Gradient Descent (Minibatch)

In this notebook, we are implementing ADALINE "by hand" without using PyTorch's autograd capabilities. In Lecture 06, we will use
"automatic differentiation" (also known as "autodiff"; or autograd in PyTorch) to implement Adaline more compactly. (The reason why we
don't use autodiff here is that it is useful to understand what's going on under the hood.)

import pandas as pd
import matplotlib.pyplot as plt
import torch
%matplotlib inline

Load & Prepare a Toy Dataset


df = pd.read_csv('./datasets/iris.data', index_col=None, header=None)
df.columns = ['x1', 'x2', 'x3', 'x4', 'y']
df = df.iloc[50:150]
df['y'] = df['y'].apply(lambda x: 0 if x == 'Iris-versicolor' else 1)
df.tail()

x1 x2 x3 x4 y

145 6.7 3.0 5.2 2.3 1

146 6.3 2.5 5.0 1.9 1

147 6.5 3.0 5.2 2.0 1

148 6.2 3.4 5.4 2.3 1

149 5.9 3.0 5.1 1.8 1

# Assign features and target

X = torch.tensor(df[['x2', 'x3']].values, dtype=torch.float)


y = torch.tensor(df['y'].values, dtype=torch.int)

# Shuffling & train/test split

torch.manual_seed(123)
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)

X, y = X[shuffle_idx], y[shuffle_idx]

percent70 = int(shuffle_idx.size(0)*0.7)

X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]


y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]

# Normalize (mean zero, unit variance)

mu, sigma = X_train.mean(dim=0), X_train.std(dim=0)


X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], label='class 0')


plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], label='class 1')
plt.legend()
plt.show()

plt.scatter(X_test[y_test == 0, 0], X_test[y_test == 0, 1], label='class 0')


plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], label='class 1')
plt.legend()
plt.show()

Implement ADALINE Model


class Adaline1():
def __init__(self, num_features):
self.num_features = num_features
self.weights = torch.zeros(num_features, 1,
dtype=torch.float)
self.bias = torch.zeros(1, dtype=torch.float)

def forward(self, x):


netinputs = torch.add(torch.mm(x, self.weights), self.bias)
activations = netinputs
return activations.view(-1)

def backward(self, x, yhat, y):

grad_loss_yhat = 2*(yhat - y)

grad_yhat_weights = x
grad_yhat_bias = 1.

# Chain rule: inner times outer


grad_loss_weights = torch.mm(grad_yhat_weights.t(),
grad_loss_yhat.view(-1, 1)) / y.size(0)

grad_loss_bias = torch.sum(grad_yhat_bias*grad_loss_yhat) / y.size(0)

# return negative gradient


return (-1)*grad_loss_weights, (-1)*grad_loss_bias

Define Training and Evaluation Functions


####################################################
##### Training and evaluation wrappers
###################################################

def loss(yhat, y):


return torch.mean((yhat - y)**2)

def train(model, x, y, num_epochs,


learning_rate=0.01, seed=123, minibatch_size=10):
cost = []

torch.manual_seed(seed)
for e in range(num_epochs):

#### Shuffle epoch


shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)
minibatches = torch.split(shuffle_idx, minibatch_size)

for minibatch_idx in minibatches:

#### Compute outputs ####


yhat = model.forward(x[minibatch_idx])

#### Compute gradients ####


negative_grad_w, negative_grad_b = \
model.backward(x[minibatch_idx], yhat, y[minibatch_idx])

#### Update weights ####


model.weights += learning_rate * negative_grad_w
model.bias += learning_rate * negative_grad_b

#### Logging ####


minibatch_loss = loss(yhat, y[minibatch_idx])
print(' Minibatch MSE: %.3f' % minibatch_loss)

#### Logging ####


yhat = model.forward(x)
curr_loss = loss(yhat, y)
print('Epoch: %03d' % (e+1), end="")
print(' | MSE: %.5f' % curr_loss)
cost.append(curr_loss)

return cost

Train Model
model = Adaline1(num_features=X_train.size(1))
cost = train(model,
X_train, y_train.float(),
num_epochs=20,
learning_rate=0.1,
seed=123,
minibatch_size=10)

Evaluate ADALINE Model

Plot Loss (MSE)

plt.plot(range(len(cost)), cost)
plt.ylabel('Mean Squared Error')
plt.xlabel('Epoch')
plt.show()

Compare with analytical solution


print('Weights', model.weights)
print('Bias', model.bias)

Weights tensor([[-0.0763],
[ 0.4181]])
Bias tensor([0.4888])

def analytical_solution(x, y):


Xb = torch.cat( (torch.ones((x.size(0), 1)), x), dim=1)
w = torch.zeros(x.size(1))
z = torch.inverse(torch.matmul(Xb.t(), Xb))
params = torch.matmul(z, torch.matmul(Xb.t(), y))
b, w = torch.tensor([params[0]]), params[1:].view(x.size(1), 1)
return w, b

w, b = analytical_solution(X_train, y_train.float())
print('Analytical weights', w)
print('Analytical bias', b)

Analytical weights tensor([[-0.0703],


[ 0.4219]])
Analytical bias tensor([0.4857])
Evaluate on Evaluation Metric (Prediction Accuracy)
ones = torch.ones(y_train.size())
zeros = torch.zeros(y_train.size())
train_pred = model.forward(X_train)
train_acc = torch.mean(
(torch.where(train_pred > 0.5,
ones,
zeros).int() == y_train).float())

ones = torch.ones(y_test.size())
zeros = torch.zeros(y_test.size())
test_pred = model.forward(X_test)
test_acc = torch.mean(
(torch.where(test_pred > 0.5,
ones,
zeros).int() == y_test).float())

print('Training Accuracy: %.2f' % (train_acc*100))


print('Test Accuracy: %.2f' % (test_acc*100))

Training Accuracy: 90.00


Test Accuracy: 96.67

Decision Boundary
##########################
### 2D Decision Boundary
##########################

w, b = model.weights, model.bias - 0.5

x_min = -3
y_min = ( (-(w[0] * x_min) - b[0])
/ w[1] )

x_max = 3
y_max = ( (-(w[0] * x_max) - b[0])
/ w[1] )

fig, ax = plt.subplots(1, 2, sharex=True, figsize=(7, 3))

ax[0].plot([x_min, x_max], [y_min, y_max])


ax[1].plot([x_min, x_max], [y_min, y_max])

ax[0].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label='class 0', marker='o')


ax[0].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label='class 1', marker='s')

ax[1].scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], label='class 0', marker='o')


ax[1].scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], label='class 1', marker='s')

ax[1].legend(loc='upper left')
plt.show()

Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js

You might also like