Closed
Description
@apaszke
This tutorial "char_rnn_classification" has a bug in the forward part of this code:
import torch.nn as nn
from torch.autograd import Variable
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return Variable(torch.zeros(1, self.hidden_size))
n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)
Which is implemented by these lines:
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
However, these lines:
output = self.i2o(combined)
output = self.softmax(output)
Are trying to project into the classification space. However, the self.i2o operates on the combined output instead of the ht output.
This implementation uses the wrong formula:
Which can be implemented as:
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(hidden) # this line changed (bc hidden = ht. combined = ht-1)
output = self.softmax(output)
return output, hidden