0% found this document useful (0 votes)
6 views

lesson2

The document discusses neural network training techniques, including the calculation of sum of squared errors and cross-entropy error, as well as mini-batch learning. It introduces numerical differentiation, gradient descent, and the implementation of a two-layer neural network class. Additionally, it covers the process of mini-batch training and evaluating the model using test data.

Uploaded by

Thùy Minh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

lesson2

The document discusses neural network training techniques, including the calculation of sum of squared errors and cross-entropy error, as well as mini-batch learning. It introduces numerical differentiation, gradient descent, and the implementation of a two-layer neural network class. Additionally, it covers the process of mini-batch training and evaluating the model using test data.

Uploaded by

Thùy Minh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

lesson2

March 16, 2024

#Chapter 4: Neural Network Training


##Sum of Squared Errors
[ ]: import numpy as np
y = [1.0,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
#y = [1.0,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
t = [0,0,1,0,0,0,0,0,0,0]

def sum_squared_error(y,t):
return 0.5*np.sum((y-t)**2)

sqe = sum_squared_error(np.array(y), np.array(t))

print(sqe)

0.5925
##Cross-Entrpy Error
[ ]: import numpy as np

def cross_entropy_error(y,t):
delta = 1e-7
return -np.sum(t*np.log(y+delta))

y = [1.0,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
t = [0,0,1,0,0,0,0,0,0,0]

cee = cross_entropy_error(np.array(y), np.array(t))


print(cee)

0.510825457099338
• In the example, the output correct label is 0.6 and the crossp-entropy error is 0.51
##Mini-Batch Learning - In neural network training, some training data is selected, and training
is conducted for each group of data, which is called a mini-batch
[ ]: from google.colab import drive
drive.mount('/content/drive')

1
Mounted at /content/drive

[9]: cd /content/drive/MyDrive/GG Colab/Deep Learning/dataset

/content/drive/MyDrive/GG Colab/Deep Learning/dataset

[10]: from my_mnist import load_mnist

[ ]: import sys, os
sys.path.append(os.pardir)
import numpy as np

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,␣


↪one_hot_label=False)

print(x_train.shape)
print(t_train.shape)

(60000, 784)
(60000,)
##Numericial Differentiation
[ ]: import numpy as np
import matplotlib.pyplot as plt

def numerical_diff(f,x):
h = 1e-4
return (f(x+h) - f(x-h)) / (2*h)

def function_1(x):
return 0.01*x**2 + 0.1*x

x = np.arange(0.0,20.0, 0.1)
y = function_1(x)
plt.xlabel("x")
plt.ylabel("f(x)")
plt.plot(x,y)
plt.show()

2
##Partial Derivative
[ ]: def function_2(x):
return x[0]**2 + x[1]**2

##Gradient
[ ]: import numpy as np

def function_2(x):
return x[0]**2 + x[1]**2

def numericial_gradient(f,x):
h = 1e-4
grad = np.zeros_like(x) #Tra ve mot mang co kich thuoc giong voi mang da cho␣
↪va bang 0

for idx in range(x.size):


tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)

3
x[idx] = tmp_val - h
fxh2 = f(x)

grad[idx] = (fxh1 - fxh2) / (2*h)


x[idx] = tmp_val
return grad

numericial_gradient(function_2, np.array([3.0, 4.0]))

[ ]: array([6., 8.])

##Gradient descent
[ ]: import numpy as np

init_x = np.array([-3.0, 4.0])

def numericial_gradient(f,x):
h = 1e-4
grad = np.zeros_like(x) #Tra ve mot mang co kich thuoc giong voi mang da cho␣
↪va bang 0

for idx in range(x.size):


tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)

x[idx] = tmp_val - h
fxh2 = f(x)

grad[idx] = (fxh1 - fxh2) / (2*h)


x[idx] = tmp_val
return grad

def function_2(x):
return x[0]**2 + x[1]**2

def gradient_descent(f, init_x, lr = 0.01, step_num = 100):


x = init_x

for i in range(step_num):
grad = numericial_gradient(f,x)
x -= lr * grad

return x

gradient_descent(function_2, init_x = init_x, lr = 0.1, step_num = 100)

4
[ ]: array([-6.11110793e-10, 8.14814391e-10])

##Gradients for a Neural Network


[3]: from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

[4]: cd /content/drive/MyDrive/GG Colab/Deep Learning/common

/content/drive/MyDrive/GG Colab/Deep Learning/common

[5]: from my_functions import softmax, cross_entropy_error

[6]: from my_gradient import numerical_gradient

[ ]: import sys, os
sys.path.append(os.pardir)
import numpy as np

class simpleNet:
def __init__ (self):
self.W = np.random.randn(2,3)

def predict(self, x):


return np.dot(x,self.W)

def loss(self,x,t):
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y,t)

return loss
net = simpleNet()
print(net.W)

x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)

t = np.array([0,0,1])
net.loss(x,t)

def f(W):
return net.loss(x,t)

dW = numerical_gradient(f, net.W)

5
print(dW)

[[-1.02003529 0.65014502 0.34236522]


[-0.24540338 -0.74331997 1.15400741]]
[-0.83288421 -0.27890096 1.2440258 ]
[[ 0.05597043 0.09739811 -0.15336855]
[ 0.08395565 0.14609717 -0.23005282]]
##A Two-layer Neural Network as a Class
[17]: import sys, os
sys.path.append(os.pardir)
from my_functions import *
from my_gradient import numerical_gradient

class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.
↪01):

self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size,␣
↪hidden_size)

self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size,␣
↪output_size)

self.params['b2'] = np.zeros(output_size)

def predict(self, x):


W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)

return y

def loss(self, x, t):


y = self.predict(x)

return cross_entropy_error(y,t)

def accuracy(self, x, t):


y = self.predict(x)
y = np.argmax(y, axis = 1)
t = np.argmax(t, axis = 1)

accuracy = np.sum(y == t) / float(x.shape[0])

6
return accuracy

def numerical_gradient(self, x, t):


loss_W = lambda W: self.loss(x,t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

return grads

net = TwoLayerNet(input_size = 784, hidden_size = 100, output_size = 10)


net.params['W1'].shape
net.params['b1'].shape
net.params['W2'].shape
net.params['b2'].shape

[17]: (10,)

##Implementing Mini-Batch Training


[ ]: import numpy as np
from my_mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True,␣


↪one_hot_label=True)

train_loss_list = []

inters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size = 784, hidden_size=50, output_size = 10)

for i in range(inters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

grad = network.numerical_gradient(x_batch, t_batch)

for key in ('W1', 'b1', 'W2', 'b2'):


network.params[key] -= learning_rate * grad[key]

7
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)

##Using Test Data for Evaluation


[ ]: import numpy as np
from my_mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True,␣


↪one_hot_label=True)

train_loss_list = []
train_acc_list = []
test_acc_list = []
inter_per_epoch = max(train_size / batch_size, 1)

inters_num = 10000
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet (input_size = 784, hidden_size = 50, output_size = 10)

for i in range (inters_num):


batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

grad = network.numerical_gradient(x_batch, t_batch)

for key in ('W1', 'b1', 'W2', 'b2'):


network.params[key] -= learning_rate * grad[key]

loss = network.loss(x_batch, t_batch)


train_loss_list.append(loss)

if i% inter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + " , " + str(test_acc))

You might also like