from google.colab import drive
drive.mount('/content/gdrive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator

sns.set(style='white', context='notebook', palette='deep')
/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

Load the data

train = pd.read_csv('/content/gdrive/My Drive/digit-recognizer/train.csv')
test = pd.read_csv('/content/gdrive/My Drive/digit-recognizer/test.csv')

print(train.shape)
print(test.shape)
(42000, 785)
(28000, 784)
Y_train = train['label']
X_train =train.drop(labels = ['label'], axis = 1)

Checking the distribution of the train data

sns.countplot(Y_train)
<matplotlib.axes._subplots.AxesSubplot at 0x7fdff922d828>

Testing for Null and Missing values

X_train.isnull().any()
pixel0      False
pixel1      False
pixel2      False
pixel3      False
pixel4      False
            ...  
pixel779    False
pixel780    False
pixel781    False
pixel782    False
pixel783    False
Length: 784, dtype: bool
test.isnull().any()
pixel0      False
pixel1      False
pixel2      False
pixel3      False
pixel4      False
            ...  
pixel779    False
pixel780    False
pixel781    False
pixel782    False
pixel783    False
Length: 784, dtype: bool

Normalize the Data

x_train = X_train / 255
test = test / 255

Reshape

X_train = X_train.values.reshape(-1, 28, 28, 1)
test = test.values.reshape(-1, 28, 28, 1)

Label Encoding

Y_train = tf.keras.utils.to_categorical(Y_train, num_classes = 10)

Split the Training into Training/Test Set

X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size = 0.1, random_state=2)
g = plt.imshow(X_train[1][:,:,0])

CNN Model

model= tf.keras.models.Sequential(
    [tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', input_shape = (28,28,1)),
     tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'),
     tf.keras.layers.MaxPooling2D(2, 2),
     tf.keras.layers.Dropout(.25),
     tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'),
     tf.keras.layers.MaxPooling2D(2,2),
     tf.keras.layers.Flatten(),
     tf.keras.layers.Dense(128, activation='relu'),
     tf.keras.layers.Dense(10, activation='softmax')]
)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 26, 26, 64)        640       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               204928    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
=================================================================
Total params: 280,714
Trainable params: 280,714
Non-trainable params: 0
_________________________________________________________________

Without Data augmentation, Validation accuracy = 98.98%

model.fit(X_train, Y_train,validation_data=(X_test, Y_test), batch_size=100, epochs=24, verbose=2)
Epoch 1/24
WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0033s vs `on_train_batch_end` time: 0.0072s). Check your callbacks.
378/378 - 3s - loss: 0.6462 - accuracy: 0.9002 - val_loss: 0.0789 - val_accuracy: 0.9743
Epoch 2/24
378/378 - 3s - loss: 0.0802 - accuracy: 0.9747 - val_loss: 0.0541 - val_accuracy: 0.9826
Epoch 3/24
378/378 - 3s - loss: 0.0576 - accuracy: 0.9822 - val_loss: 0.0570 - val_accuracy: 0.9807
Epoch 4/24
378/378 - 3s - loss: 0.0437 - accuracy: 0.9862 - val_loss: 0.0457 - val_accuracy: 0.9850
Epoch 5/24
378/378 - 3s - loss: 0.0382 - accuracy: 0.9885 - val_loss: 0.0463 - val_accuracy: 0.9857
Epoch 6/24
378/378 - 3s - loss: 0.0335 - accuracy: 0.9890 - val_loss: 0.0548 - val_accuracy: 0.9852
Epoch 7/24
378/378 - 3s - loss: 0.0298 - accuracy: 0.9899 - val_loss: 0.0396 - val_accuracy: 0.9888
Epoch 8/24
378/378 - 3s - loss: 0.0269 - accuracy: 0.9911 - val_loss: 0.0469 - val_accuracy: 0.9864
Epoch 9/24
378/378 - 3s - loss: 0.0253 - accuracy: 0.9919 - val_loss: 0.0492 - val_accuracy: 0.9860
Epoch 10/24
378/378 - 3s - loss: 0.0219 - accuracy: 0.9932 - val_loss: 0.0482 - val_accuracy: 0.9871
Epoch 11/24
378/378 - 3s - loss: 0.0202 - accuracy: 0.9933 - val_loss: 0.0354 - val_accuracy: 0.9912
Epoch 12/24
378/378 - 3s - loss: 0.0188 - accuracy: 0.9940 - val_loss: 0.0487 - val_accuracy: 0.9883
Epoch 13/24
378/378 - 3s - loss: 0.0239 - accuracy: 0.9925 - val_loss: 0.0493 - val_accuracy: 0.9881
Epoch 14/24
378/378 - 3s - loss: 0.0182 - accuracy: 0.9939 - val_loss: 0.0456 - val_accuracy: 0.9900
Epoch 15/24
378/378 - 3s - loss: 0.0167 - accuracy: 0.9947 - val_loss: 0.0480 - val_accuracy: 0.9890
Epoch 16/24
378/378 - 3s - loss: 0.0160 - accuracy: 0.9947 - val_loss: 0.0424 - val_accuracy: 0.9900
Epoch 17/24
378/378 - 3s - loss: 0.0172 - accuracy: 0.9946 - val_loss: 0.0487 - val_accuracy: 0.9881
Epoch 18/24
378/378 - 3s - loss: 0.0174 - accuracy: 0.9947 - val_loss: 0.0624 - val_accuracy: 0.9869
Epoch 19/24
378/378 - 3s - loss: 0.0131 - accuracy: 0.9958 - val_loss: 0.0492 - val_accuracy: 0.9905
Epoch 20/24
378/378 - 3s - loss: 0.0137 - accuracy: 0.9954 - val_loss: 0.0533 - val_accuracy: 0.9871
Epoch 21/24
378/378 - 3s - loss: 0.0129 - accuracy: 0.9958 - val_loss: 0.0422 - val_accuracy: 0.9910
Epoch 22/24
378/378 - 3s - loss: 0.0127 - accuracy: 0.9962 - val_loss: 0.0484 - val_accuracy: 0.9900
Epoch 23/24
378/378 - 3s - loss: 0.0119 - accuracy: 0.9963 - val_loss: 0.0489 - val_accuracy: 0.9888
Epoch 24/24
378/378 - 3s - loss: 0.0133 - accuracy: 0.9957 - val_loss: 0.0484 - val_accuracy: 0.9907
<tensorflow.python.keras.callbacks.History at 0x7fdff8f17a90>
model.save('/content/gdrive/My Drive/digit-recognizer/digit_recognizer_trial_model.h5')

Confusion Matrix

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Predict the values from the validation dataset
Y_pred = model.predict(X_test)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1)
# Convert validation observations to one hot vectors
Y_true = np.argmax(Y_test,axis = 1)   
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = range(10)) 
results = model.predict(test)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("/content/gdrive/My Drive/digit-recognizer/cnn_mnist_datagen.csv",index=False)