Program Code - Digit Recognizer Paper
Program Code - Digit Recognizer Paper
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-
python
# For example, here's several helpful packages to load in
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# Any results you write to the current directory are saved as output.
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv
/kaggle/input/digit-recognizer/sample_submission.csv
In [2]:
import tensorflow as tf
import matplotlib.pyplot as plt
tf.__version__
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation, MaxPooling2D, Conv2D,
Conv1D, MaxPooling1D
In [3]:
train=pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test=pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
sample_submission=pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')
In [4]:
X_train=train.drop(columns=['label']).values
y_train=train.label.values
X_train=tf.keras.utils.normalize(X_train, axis=1)
X_test=tf.keras.utils.normalize(test, axis=1).values
In [5]:
print(X_train.shape, y_train.shape, X_test.shape)
(42000, 784) (42000,) (28000, 784)
In [6]:
X_test1 = X_test.reshape(X_test.shape[0],28,28,1)
X_train1 = X_train.reshape(X_train.shape[0],28,28,1)
In [7]:
#model=tf.keras.models.Sequential()
#model.add(tf.keras.layers.Flatten())
#model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))
model=Sequential()
model.add(Conv2D(128, (3,3), input_shape=X_train1.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
model.fit(X_train1, y_train, epochs=20, validation_split=0.1)
Train on 37800 samples, validate on 4200 samples
Epoch 1/20
37800/37800 [==============================] - 75s 2ms/sample - loss: 0.2935 -
accuracy: 0.9065 - val_loss: 0.1064 - val_accuracy: 0.9629
Epoch 2/20
37800/37800 [==============================] - 75s 2ms/sample - loss: 0.0934 -
accuracy: 0.9708 - val_loss: 0.0834 - val_accuracy: 0.9748
Epoch 3/20
37800/37800 [==============================] - 74s 2ms/sample - loss: 0.0653 -
accuracy: 0.9793 - val_loss: 0.0587 - val_accuracy: 0.9821
Epoch 4/20
37800/37800 [==============================] - 74s 2ms/sample - loss: 0.0503 -
accuracy: 0.9845 - val_loss: 0.0615 - val_accuracy: 0.9795
Epoch 5/20
7872/37800 [=====>........................] - ETA: 55s - loss: 0.0369 -
accuracy: 0.9874
In [8]:
model.save('my_digit_recognizer')
new_model=tf.keras.models.load_model('my_digit_recognizer')
y_pred=new_model.predict_classes(X_test1)
In [9]:
sample_submission.head()
submission=pd.DataFrame({'ImageId': sample_submission.ImageId,'Label':y_pred})
submission.to_csv('/kaggle/working/submission.csv',index=False)
check=pd.read_csv('/kaggle/working/submission.csv')
check.head()
Out[9]:
ImageId Label
0 1 2
1 2 0
2 3 9
3 4 9
4 5 3
In [10]:
X_test_1=X_test.reshape(X_test.shape[0],28,28)
plt.imshow(X_test_1[100])
plt.show()
print('Prediction: ', y_pred[100])
Prediction: 0