from time import time

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from matplotlib import offsetbox

from Scikit-learn import manifold, datasets, decomposition

import warnings
warnings.filterwarnings("ignore")

# Connect to google
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

digits = datasets.load_digits(n_class = 6)    # Loading the data using the Scikit-learn library

X = digits.data

y = digits.target

n_samples, n_features = X.shape

n_neighbors = 30

X.shape    # Printing the dimensions of X

(1083, 64)

# t-SNE embedding of the digits dataset

print("Computing t-SNE embedding")

t0 = time()

tsne = manifold.TSNE(n_components = 2, init = 'pca', random_state = 0)

X_tsne = tsne.fit_transform(X)

t1 = time()

tsne_time = t1-t0

print("t-SNE-Embeddings in time {}".format(tsne_time),"\n", X_tsne)

print("***************************************************")

# Projection on the first 2 principal components using PCA

print("Computing PCA projection")

t0 = time()

X_pca = decomposition.PCA(n_components = 2).fit_transform(X)

t1 = time()

pca_time = t1 - t0

print("PCA projections in time {}".format(pca_time), "\n", X_pca)

print("***************************************************")

Computing t-SNE embedding
t-SNE-Embeddings in time 4.365969896316528 
 [[ 18.388689  -45.670406 ]
 [  7.6534414  34.635826 ]
 [ -6.041101   28.337845 ]
 ...
 [ 31.307928    7.38128  ]
 [ 35.022804    8.89271  ]
 [ 23.916073  -40.07191  ]]
***************************************************
Computing PCA projection
PCA projections in time 0.00977945327758789 
 [[ 10.7625586  -24.73806947]
 [ -0.62690578  26.83050006]
 [  1.68579953  12.29145409]
 ...
 [ 30.84986533   6.30638449]
 [ 32.14391381  10.57477508]
 [ 16.46964915 -21.36013715]]
***************************************************

# Function to create scatter plot

def scatter(X, title = None):
    x1 = []

    x2 = []

    plt.figure(figsize = (10, 10))

    plt.title(title)

    for i in X:
        x1.append(i[0])

        x2.append(i[1])

    sns.scatterplot(x = x1, y = x2)

    plt.show()

scatter(X_tsne, "t-SNE embedding of the digits")

scatter(X_pca, "Principal Components projection of the digits using PCA")

# Scale and visualize the embedding vectors

def plot_embedding(X, title=None):              # Passing the embedded array and the title of the graph

    print(X)

    x_min, x_max = np.min(X, 0), np.max(X, 0)   # Finding the max and min of the passed array

    X = (X - x_min) / (x_max - x_min)           # Scaling the array, new values are between 0 and 1

    plt.figure(figsize = (12, 12))               # Setting the figure size to a sufficiently large value

    ax = plt.subplot(111)

    for i in range(X.shape[0]):

        plt.text(X[i, 0], X[i, 1], str(y[i]),

                 color = plt.cm.Set1(y[i] / 10.),

                 fontdict = {'weight': 'bold', 'size': 9})

    if hasattr(offsetbox, 'AnnotationBbox'):

        # only print thumbnails with matplotlib > 1.0
        shown_images = np.array([[1., 1.]])      # Just something big

        for i in range(X.shape[0]):

            dist = np.sum((X[i] - shown_images) ** 2, 1)

            if np.min(dist) < 4e-3:

                # don't show points that are too close
                continue

            shown_images = np.r_[shown_images, [X[i]]]

            imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(digits.images[i], cmap = plt.cm.gray_r), X[i])

            ax.add_artist(imagebox)

    plt.xticks([]), plt.yticks([])

    if title is not None:

        plt.title(title)

    plt.show()

# Plotting t-SNE embeddings
plot_embedding(X_tsne,
               "t-SNE embedding of the digits (time %.2fs)" %
               (tsne_time))

# Plotting PCA projections
plot_embedding(X_pca,
               "Principal Components projection of the digits (time %.2fs)" %
               (pca_time))

[[ 18.388689  -45.670406 ]
 [  7.6534414  34.635826 ]
 [ -6.041101   28.337845 ]
 ...
 [ 31.307928    7.38128  ]
 [ 35.022804    8.89271  ]
 [ 23.916073  -40.07191  ]]

[[ 10.7625586  -24.73806947]
 [ -0.62690578  26.83050006]
 [  1.68579953  12.29145409]
 ...
 [ 30.84986533   6.30638449]
 [ 32.14391381  10.57477508]
 [ 16.46964915 -21.36013715]]

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/My Drive/Colab Notebooks/Copy of FDS_Project_LearnerNotebook_FullCode.ipynb"

MNIST Digit Visualization¶

Context¶

Importing the libraries¶

Loading the dataset¶

Visualizing the data¶

Results and Conclusion¶