import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

import tensorflow as tf

# Keras Sequential Model
from tensorflow.keras.models import Sequential

# Importing all the different layers and optimizers
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Activation, LeakyReLU
from tensorflow.keras.optimizers import Adam,SGD

# The below code can be used to ignore the warnings that may occur due to deprecations
import warnings
warnings.filterwarnings("ignore")

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

# Loading the data
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11490434/11490434 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step

X_train.shape

(60000, 28, 28)

X_train[0]

ndarray (28, 28) show data

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 170,
        253, 253, 253, 253, 253, 225, 172, 253, 242, 195,  64,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,  49, 238, 253, 253, 253, 253,
        253, 253, 253, 253, 251,  93,  82,  82,  56,  39,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,  18, 219, 253, 253, 253, 253,
        253, 198, 182, 247, 241,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,  80, 156, 107, 253, 253,
        205,  11,   0,  43, 154,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,  14,   1, 154, 253,
         90,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 139, 253,
        190,   2,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  11, 190,
        253,  70,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  35,
        241, 225, 160, 108,   1,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         81, 240, 253, 253, 119,  25,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,  45, 186, 253, 253, 150,  27,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,  16,  93, 252, 253, 187,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0, 249, 253, 249,  64,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,  46, 130, 183, 253, 253, 207,   2,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  39,
        148, 229, 253, 253, 253, 250, 182,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  24, 114, 221,
        253, 253, 253, 253, 201,  78,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,  23,  66, 213, 253, 253,
        253, 253, 198,  81,   2,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,  18, 171, 219, 253, 253, 253, 253,
        195,  80,   9,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,  55, 172, 226, 253, 253, 253, 253, 244, 133,
         11,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0, 136, 253, 253, 253, 212, 135, 132,  16,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0]], dtype=uint8)

y_train[0]

5

X_test.shape

(10000, 28, 28)

num_classes=10                                                                  # Number of Classes
categories=np.unique(y_train)                                                   # Obtaing the unique classes from y_train
rows = 3                                                                        # Defining number of rows=3
cols = 4                                                                        # Defining number of columns=4
fig = plt.figure(figsize=(10, 8))                                               # Defining the figure size to 10x8
for i in range(cols):
    for j in range(rows):
        random_index = np.random.randint(0, len(y_train))                       # Generating random indices from the data and plotting the images
        ax = fig.add_subplot(rows, cols, i * rows + j + 1)                      # Adding subplots with 3 rows and 4 columns
        ax.imshow(X_train[random_index, :], cmap=plt.get_cmap('gray'))          # Plotting the image using cmap=gray
        ax.set_title(categories[y_train[random_index]])
plt.show()

# Plot distribution of each category
count_plot = sns.countplot(x=y_train, palette='tab10')

# Reshape dataset to have a single channel
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))

# Normalizing the image pixels
X_train_normalized = X_train.astype('float32')/255.0
X_test_normalized = X_test.astype('float32')/255.0

# Creating one-hot encoded representation of target labels
# We can do this by using this utility function - https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical
# to_categorical() function is also explained in the Neural Networks Module

y_train_encoded = tf.keras.utils.to_categorical(y_train)
y_test_encoded = tf.keras.utils.to_categorical(y_test)

# Fixing the seed for random number generators
import random
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# Intializing a sequential model
ann_model = Sequential()

# Flatten the input to add dense convolutional layers on top of it
ann_model.add(Flatten(input_shape=(28, 28)))

# Adding a sequential layer with 100 neurons
ann_model.add(Dense(100, activation='relu'))

# Adding the output layer with 10 neurons and activation functions as softmax since this is a multi-class classification problem
ann_model.add(Dense(10, activation='softmax'))

# Using SGD Optimizer
opt = SGD(learning_rate=0.01, momentum=0.9)

# Compile model
ann_model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Generating the summary of the model
ann_model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ flatten (Flatten)                    │ (None, 784)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 100)                 │          78,500 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 10)                  │           1,010 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 79,510 (310.59 KB)

 Trainable params: 79,510 (310.59 KB)

 Non-trainable params: 0 (0.00 B)

history = ann_model.fit(
            X_train_normalized, y_train_encoded,
            epochs=15,
            validation_split=0.1,
            shuffle=True,
            batch_size=64,
            verbose=2
)

Epoch 1/15
844/844 - 3s - 4ms/step - accuracy: 0.8861 - loss: 0.4072 - val_accuracy: 0.9443 - val_loss: 0.2013
Epoch 2/15
844/844 - 2s - 3ms/step - accuracy: 0.9388 - loss: 0.2151 - val_accuracy: 0.9593 - val_loss: 0.1459
Epoch 3/15
844/844 - 3s - 3ms/step - accuracy: 0.9535 - loss: 0.1631 - val_accuracy: 0.9653 - val_loss: 0.1210
Epoch 4/15
844/844 - 3s - 4ms/step - accuracy: 0.9630 - loss: 0.1332 - val_accuracy: 0.9690 - val_loss: 0.1068
Epoch 5/15
844/844 - 4s - 5ms/step - accuracy: 0.9683 - loss: 0.1134 - val_accuracy: 0.9710 - val_loss: 0.0975
Epoch 6/15
844/844 - 3s - 3ms/step - accuracy: 0.9724 - loss: 0.0988 - val_accuracy: 0.9738 - val_loss: 0.0914
Epoch 7/15
844/844 - 2s - 3ms/step - accuracy: 0.9753 - loss: 0.0874 - val_accuracy: 0.9747 - val_loss: 0.0866
Epoch 8/15
844/844 - 3s - 4ms/step - accuracy: 0.9779 - loss: 0.0782 - val_accuracy: 0.9757 - val_loss: 0.0833
Epoch 9/15
844/844 - 3s - 3ms/step - accuracy: 0.9806 - loss: 0.0706 - val_accuracy: 0.9763 - val_loss: 0.0807
Epoch 10/15
844/844 - 2s - 3ms/step - accuracy: 0.9825 - loss: 0.0642 - val_accuracy: 0.9767 - val_loss: 0.0788
Epoch 11/15
844/844 - 3s - 3ms/step - accuracy: 0.9841 - loss: 0.0586 - val_accuracy: 0.9772 - val_loss: 0.0768
Epoch 12/15
844/844 - 2s - 3ms/step - accuracy: 0.9855 - loss: 0.0536 - val_accuracy: 0.9777 - val_loss: 0.0757
Epoch 13/15
844/844 - 3s - 4ms/step - accuracy: 0.9868 - loss: 0.0492 - val_accuracy: 0.9778 - val_loss: 0.0748
Epoch 14/15
844/844 - 3s - 3ms/step - accuracy: 0.9882 - loss: 0.0454 - val_accuracy: 0.9780 - val_loss: 0.0742
Epoch 15/15
844/844 - 5s - 5ms/step - accuracy: 0.9893 - loss: 0.0419 - val_accuracy: 0.9783 - val_loss: 0.0740

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

accuracy = ann_model.evaluate(X_test_normalized, y_test_encoded, verbose=2)

313/313 - 0s - 1ms/step - accuracy: 0.9758 - loss: 0.0800

# Here we would get the output as probablities for each category
y_pred=ann_model.predict(X_test_normalized)

313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step

# Obtaining the categorical values from y_test_encoded and y_pred
y_pred_arg=np.argmax(y_pred,axis=1)
y_test_arg=np.argmax(y_test_encoded,axis=1)

# Plotting the Confusion Matrix using confusion matrix() function which is also predefined tensorflow module
confusion_matrix = tf.math.confusion_matrix(y_test_arg,y_pred_arg)
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(
    confusion_matrix,
    annot=True,
    linewidths=.4,
    fmt="d",
    square=True,
    ax=ax
)
plt.show()

# Clearing backend
from tensorflow.keras import backend
backend.clear_session()

# Fixing the seed for random number generators
import random
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# Intializing a sequential model
model = Sequential()

# Adding first conv layer with 64 filters and kernel size 3x3 , padding 'same' provides the output size same as the input size
# Input_shape denotes input image dimension of MNIST images
model.add(Conv2D(64, (3, 3), activation='relu', padding="same", input_shape=(28, 28, 1)))

# Adding max pooling to reduce the size of output of first conv layer
model.add(MaxPooling2D((2, 2), padding = 'same'))

model.add(Conv2D(32, (3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2), padding = 'same'))
model.add(Conv2D(32, (3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2), padding = 'same'))

# flattening the output of the conv layer after max pooling to make it ready for creating dense connections
model.add(Flatten())

# Adding a fully connected dense layer with 100 neurons
model.add(Dense(100, activation='relu'))

# Adding the output layer with 10 neurons and activation functions as softmax since this is a multi-class classification problem
model.add(Dense(10, activation='softmax'))

# Using SGD Optimizer
opt = SGD(learning_rate=0.01, momentum=0.9)

# Compile model
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                      │ (None, 28, 28, 64)          │             640 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d (MaxPooling2D)         │ (None, 14, 14, 64)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_1 (Conv2D)                    │ (None, 14, 14, 32)          │          18,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_1 (MaxPooling2D)       │ (None, 7, 7, 32)            │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_2 (Conv2D)                    │ (None, 7, 7, 32)            │           9,248 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_2 (MaxPooling2D)       │ (None, 4, 4, 32)            │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten (Flatten)                    │ (None, 512)                 │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 100)                 │          51,300 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 10)                  │           1,010 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 80,662 (315.09 KB)

 Trainable params: 80,662 (315.09 KB)

 Non-trainable params: 0 (0.00 B)

history_1 = model.fit(
            X_train_normalized, y_train_encoded,
            epochs=15,
            validation_split=0.1,
            shuffle=True,
            batch_size=64,
            verbose=2
)

Epoch 1/15
844/844 - 85s - 101ms/step - accuracy: 0.8903 - loss: 0.3426 - val_accuracy: 0.9748 - val_loss: 0.0769
Epoch 2/15
844/844 - 84s - 99ms/step - accuracy: 0.9776 - loss: 0.0714 - val_accuracy: 0.9840 - val_loss: 0.0517
Epoch 3/15
844/844 - 139s - 165ms/step - accuracy: 0.9841 - loss: 0.0499 - val_accuracy: 0.9877 - val_loss: 0.0427
Epoch 4/15
844/844 - 82s - 97ms/step - accuracy: 0.9875 - loss: 0.0388 - val_accuracy: 0.9872 - val_loss: 0.0445
Epoch 5/15
844/844 - 82s - 97ms/step - accuracy: 0.9901 - loss: 0.0311 - val_accuracy: 0.9870 - val_loss: 0.0484
Epoch 6/15
844/844 - 81s - 96ms/step - accuracy: 0.9922 - loss: 0.0250 - val_accuracy: 0.9852 - val_loss: 0.0537
Epoch 7/15
844/844 - 79s - 93ms/step - accuracy: 0.9935 - loss: 0.0208 - val_accuracy: 0.9847 - val_loss: 0.0561
Epoch 8/15
844/844 - 82s - 97ms/step - accuracy: 0.9946 - loss: 0.0176 - val_accuracy: 0.9843 - val_loss: 0.0601
Epoch 9/15
844/844 - 82s - 97ms/step - accuracy: 0.9952 - loss: 0.0151 - val_accuracy: 0.9835 - val_loss: 0.0593
Epoch 10/15
844/844 - 82s - 97ms/step - accuracy: 0.9957 - loss: 0.0132 - val_accuracy: 0.9843 - val_loss: 0.0656
Epoch 11/15
844/844 - 82s - 97ms/step - accuracy: 0.9964 - loss: 0.0111 - val_accuracy: 0.9875 - val_loss: 0.0552
Epoch 12/15
844/844 - 81s - 96ms/step - accuracy: 0.9971 - loss: 0.0092 - val_accuracy: 0.9895 - val_loss: 0.0489
Epoch 13/15
844/844 - 82s - 97ms/step - accuracy: 0.9974 - loss: 0.0088 - val_accuracy: 0.9895 - val_loss: 0.0513
Epoch 14/15
844/844 - 82s - 97ms/step - accuracy: 0.9977 - loss: 0.0079 - val_accuracy: 0.9893 - val_loss: 0.0478
Epoch 15/15
844/844 - 81s - 96ms/step - accuracy: 0.9974 - loss: 0.0073 - val_accuracy: 0.9907 - val_loss: 0.0443

plt.plot(history_1.history['accuracy'])
plt.plot(history_1.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

accuracy = model.evaluate(X_test_normalized, y_test_encoded, verbose=2)

313/313 - 4s - 12ms/step - accuracy: 0.9896 - loss: 0.0412

# Here we would get the output as probablities for each category
y_pred=model.predict(X_test_normalized)

313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 15ms/step

y_pred

array([[5.1827484e-16, 3.4163555e-12, 1.8032707e-10, ..., 9.9999994e-01,
        6.7353641e-14, 9.9369532e-12],
       [2.3032406e-13, 9.4549890e-10, 9.9999994e-01, ..., 2.5608724e-18,
        5.3577439e-15, 1.8851516e-19],
       [7.7639939e-10, 9.9999863e-01, 5.1731215e-09, ..., 4.7041202e-09,
        2.5416526e-07, 8.0008868e-11],
       ...,
       [1.7318714e-21, 4.4101029e-15, 1.6639497e-21, ..., 2.7947635e-16,
        8.3824451e-13, 4.2346199e-13],
       [1.9696460e-12, 1.3710701e-19, 1.6568913e-16, ..., 2.0569927e-14,
        1.4957119e-07, 1.5942498e-16],
       [1.7940918e-12, 2.2033888e-16, 2.1275314e-12, ..., 1.1317286e-20,
        6.9972561e-12, 1.2541306e-18]], dtype=float32)

# Obtaining the categorical values from y_test_encoded and y_pred
y_pred_arg=np.argmax(y_pred,axis=1)
y_test_arg=np.argmax(y_test_encoded,axis=1)

# Plotting the Confusion Matrix using confusion matrix() function which is also predefined tensorflow module
confusion_matrix = tf.math.confusion_matrix(y_test_arg,y_pred_arg)
f, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(
    confusion_matrix,
    annot=True,
    linewidths=.4,
    fmt="d",
    square=True,
    ax=ax
)
plt.show()

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/My Drive/Colab Notebooks/Copy of FDS_Project_LearnerNotebook_FullCode.ipynb"

MNIST Handwritten Digit Classification using a CNN¶

Introduction¶

Introduction to the MNIST Dataset¶

Importing the Libraries¶

Loading the Dataset¶

Data Visualization¶

Converting the Numpy arrays to images and visualizing a few random images¶

Data Preparation¶

Model Building - Artificial Neural Network (ANN)¶

Defining an Artificial Neural Network (ANN) Model¶

Model Evaluation¶

Evaluating the model on the test data¶

Generating the Predictions using the test data¶

Plotting the Confusion Matrix¶

Model Building - Convolutional Neural Network (CNN)¶

Defining a Convolutional Neural Network (CNN) Model¶

Fitting the model on the train data

Model Evaluation¶

Evaluating the model on test data¶

Generating the predictions using test data¶

Plotting the Confusion Matrix¶

Comparing the number of parameters in ANNs and CNNs¶