NAME = ""
COLLABORATORS = ""


import nose.tools as test_# For testing
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.decomposition import PCA
from sklearn.svm import SVC


# Useful in beautifying numpy arrays.
from IPython.display import HTML, display
import tabulate
def pp(a, show_head=True): 
    '''
    args: show_head -> if True print only first 5 rows.
    return: None
    '''
    if a.ndim < 2:
        a = [a]
    if show_head:
        display(HTML(tabulate.tabulate(a[:5], tablefmt='html')))
        return
    display(HTML(tabulate.tabulate(a, tablefmt='html')))


def load_data(rns, _shuffle):
    '''
    Load the dataset
    
    rns -> int -> random state
    _shuffle -> bool -> whether to shuffle the dataset.
    
    return: object of type sklearn.utils.Bunch which is dict like.
    
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


olivetti_data_ = (load_data(5, True))


test_.eq_(list(olivetti_data_.keys()), ['data', 'images', 'target', 'DESCR'])


olivetti_data_dict = load_data(0, True)


def extract_images(data, idxs_tuple):
    '''
    Extract those images which are specified in idxs_tuple. 
    
    
    args: data -> object of type sklearn.utils.Bunch which is dict like
          idxs_tuple -> of len 1 or 2. -> (a,) or (b, c)
    
    return: ndarray -> shape (n, 64, 64) where is total images extracted based on idxs_tuple
    
    
    For simplicity assume that idxs_tuple could be either of length 1 or length 2.
    If idxs_tuple is of length one, it's and index, if it's of 
    length two, it's a slice. 
    Negative slicing/indexing (e.g array[-1] returns last row) should also work. 
    Which ideally shouldn't need any additional code.
    
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.eq_(len(extract_images(olivetti_data_, (5,10))), 5)
test_.ok_ (np.isclose(extract_images(olivetti_data_, (5,))[:1], np.array([[0.3140496 , 0.29752067, 0.2768595 , 0.22727273, 0.19421488,
        0.16115703, 0.13636364, 0.12809917, 0.1694215 , 0.3264463 ,
        0.48347107, 0.5413223 , 0.57024795, 0.58677685, 0.58264464,
        0.57024795, 0.54545456, 0.5206612 , 0.46280992, 0.42975205,
        0.4090909 , 0.41322315, 0.3677686 , 0.33471075, 0.3140496 ,
        0.338843  , 0.36363637, 0.38842976, 0.38842976, 0.3966942 ,
        0.41735536, 0.4090909 , 0.42561984, 0.45867768, 0.47933885,
        0.49586776, 0.5041322 , 0.5       , 0.48347107, 0.47107437,
        0.47933885, 0.4876033 , 0.48347107, 0.49586776, 0.5       ,
        0.5       , 0.5       , 0.5082645 , 0.5289256 , 0.53305787,
        0.5413223 , 0.5371901 , 0.5289256 , 0.5123967 , 0.47933885,
        0.43801653, 0.39256197, 0.34710744, 0.29338843, 0.2768595 ,
        0.24793388, 0.21487603, 0.17768595, 0.16528925]])).all())


# Extract images 16 images. Arbitray indexing. 
images = extract_images(olivetti_data_, (10,26))
len(images)


# Modified Solution provided from test 5.
def display_image(image, _title):
    '''
    Display the image.
    args: image -> ndarray
          title -> string
    '''
    plt.axis('off')
    plt.title(_title)
    return plt.imshow(image, aspect='auto') # return value used in 'display_all_images()'


def display_all_images(images):
    '''
    Display all images in a grid.
    args: images -> ndarray -> shape (n, 64, 64) where n is total images to display
          For simplicity, assume that n is a square number e.g. 4,9,16,..
    
    return: list -> A list of of matplotlib.image.AxesImage objects which is return value of 
    `display_image()` already provided for you.
    '''
    list_of_Axes_objects = []
    # YOUR CODE HERE
    raise NotImplementedError()
    return list_of_Axes_objects
        
axes = display_all_images(images)


axes = display_all_images(images)


# All test cases are hidden


# Helper functions which convert rank2 array to rank3 and vice versa.
make_rank2 = lambda rank3arr: rank3arr.reshape(400, -1)
make_showable_image = lambda rank2arr: rank2arr.reshape(-1,64,64) # make rank3


rank2_data = olivetti_data_.data
print('Shape before (rank 2):')
print(rank2_data.shape)
showable_images = make_showable_image(rank2_data)
print('Shape After calling "make_showable_image" (rank 3):')
print(showable_images.shape)


X = olivetti_data_.data
print(X.shape) # (400, 64*64) 
y = olivetti_data_.target


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42)


n_components = 150


def train_pca(_X_train, _n_components, _solver, _whiten):
    '''
    args: _X_train -> ndarray -> shape (n, k)
        _n_components -> int
        _solver -> string (Read documentation of PCA)
        _whiten -> bool -> (Read documentation of PCA
    
    return: an object of type sklearn.decomposition._pca.PCA
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


pca = train_pca(X_train, 150, 'randomized', True)
test_.eq_(pca.components_.shape, (150, 4096))


pca = train_pca(X_train, 150, 'randomized', True)


def var_explained_amount(_pca):
    '''
    pca -> object of type "sklearn.decomposition._pca.PCA"
    
    retrun ndarray of shape (n_components,)
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.eq_(len(var_explained_amount(pca)), n_components)


print('variance explained by each component:')
pp(var_explained_amount(pca))


def get_eigenfaces(_pca):
    ''' 
    args: pca -> object of type "sklearn.decomposition._pca.PCA"
    
    retrun ndarray of shape (n_components,64*64)
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.eq_(get_eigenfaces(pca).shape, (150, 4096))


def _transform(_pca, data):
    '''
    args: pca -> object of type "sklearn.decomposition._pca.PCA"
    return: ndarray (n, ncomponents) where n is len(data)
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


X_train_pca = _transform(pca, X_train)
test_.eq_(len(X_train_pca), 300)


X_train_pca = _transform(pca, X_train)
X_test_pca = _transform(pca, X_test)


def display_eigenfaces(_pca, n):
    '''
    Display/Plot first n eigenfaces you extract using PCA.
    
    args: pca -> object of type "sklearn.decomposition._pca.PCA"
          n -> int
    return: Exactly what display_all_images returns i.e. list -> A list 
                                of of matplotlib.image.AxesImage objects
                                
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


ax = display_eigenfaces(pca, 25)


axes = display_eigenfaces(pca, 16)
test_.eq_(len(axes), (16))
test_.eq_((axes[0].get_array().shape), (64, 64))


svc_clf = SVC(kernel='rbf', class_weight='balanced', C=1000, gamma=0.005)
# train a SVC classifier.
svc_clf = svc_clf.fit(X_train_pca, y_train)


svc_clf


svc_clf.score(X_test_pca, y_test)

Test 6

Dimensionality Reduction¶

Eigenface and PCA¶

Dataset¶

Question 1¶

Question 2¶

Question 3¶

Data To Train PCA¶

Train Test Split¶

Question 4¶

Question 5¶

Question 6¶

Question 7¶

Question 8¶

Expected Eigenfaces you should see for n = 25.¶