# Useful in beautifying numpy arrays.
from IPython.display import HTML, display
import tabulate
def pp(a, show_head=True): 
    '''
    args: show_head -> if True print only first 5 rows.
    return: None
    '''
    if a.ndim < 2:
        a = [a]
    if show_head:
        display(HTML(tabulate.tabulate(a[:5], tablefmt='html')))
        return
    display(HTML(tabulate.tabulate(a, tablefmt='html')))


import matplotlib.pyplot as plt


import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])

# Plot this data
plt.scatter(X[:,0], X[:,1])

<matplotlib.collections.PathCollection at 0x7f8457db67b8>


# Fit on X. 
pca = PCA(n_components=1)
_pca = pca.fit(X)

print('Orginial Features:')
pp(X) # print only first 5 entries.

Orginial Features:


X.mean(axis=0)

array([0., 0.])

array([[-1., -1.],
       [-2., -1.],
       [-3., -2.],
       [ 1.,  1.],
       [ 2.,  1.],
       [ 3.,  2.]])


x_centered = np.sub


principal_component = _pca.components_
print('Principal Component:')
pp(principal_component)

Principal Component:


min_vals = X.min(axis=0)
x_min_val = min_vals[0]
y_min_val = min_vals[1]
origin = [x_min_val], [y_min_val] # origin point. Set to min along each dimension.
plt.quiver(*origin, principal_component, color=['r',], scale=21)
plt.scatter(X[:,0], X[:,1])

<matplotlib.collections.PathCollection at 0x7fc83f49f710>


pp(_pca.explained_variance_) # See sklearn docs for more info.


reduced_features = _pca.transform(X)
print('Reduced Features (to 1 dim):')
pp(reduced_features, show_head=True) # Print features Reduced to 1 dim

Reduced Features (to 1 dim):


from sklearn.decomposition import TruncatedSVD
from scipy.sparse import random as sparse_random
from sklearn.random_projection import sparse_random_matrix
X = sparse_random(100, 100, density=0.01, format='csr',
                  random_state=42)
pp(X.toarray(), show_head=True)


svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
svd.fit(X)

TruncatedSVD(algorithm='randomized', n_components=5, n_iter=7, random_state=42,
             tol=0.0)


print('5 Components:')
pp(svd.components_)

5 Components:


print('Variances explained by each of the 5 components:')
pp(svd.explained_variance_ratio_)

Variances explained by each of the 5 components:


print('Singular Values for each of the 5 components:')
pp(svd.singular_values_)

Singular Values for each of the 5 components:

-6.00779e-07	-8.83491e-07	-3.65132e-06	3.15929e-06	-2.2519e-07	-6.1254e-06	-3.29022e-20	1.23116e-06	-8.73522e-20	-6.41882e-07	-2.37452e-20	3.0254e-07	1.24859e-21	-1.60615e-20	-1.26827e-08	1.42846e-05	7.93096e-12	0.00133738	-5.66238e-06	-1.3481e-12	3.24036e-05	0.0355343	-1.05061e-05	-3.78555e-07	0.155288	-5.91275e-07	-3.29048e-05	-2.99188e-05	-0.000101377	2.11164e-07	0.522662	8.3302e-15	-7.79873e-07	2.06722e-05	1.49847e-08	0.00913598	-3.87475e-05	6.82773e-07	-7.00074e-06	0.108023	-5.23862e-06	-7.79137e-06	-3.81264e-05	-7.49007e-10	7.05256e-06	6.58057e-10	-7.31666e-10	0.0663846	-4.42296e-06	2.0114e-06	0.40124	-6.70833e-07	4.60826e-07	8.34314e-10	0.134636	0.71078	3.96812e-07	-1.47714e-05	-4.3789e-13	1.06576e-06	-9.29866e-06	-1.13194e-05	0.00799787	9.41237e-06	-6.38086e-12	7.00827e-12	-5.95141e-06	-8.21677e-12	0.0299047
-6.99759e-06	-5.12766e-07	3.34032e-07	3.94799e-06	-2.62141e-06	-1.32562e-05	-4.67027e-19	9.01945e-07	-5.45761e-19	-2.15999e-05	-1.68002e-19	-1.18495e-07	-4.83603e-20	-2.25162e-21	-3.61267e-08	-1.88637e-06	4.92127e-11	0.000137543	-5.02985e-06	1.7385e-11	0.709024	1.06702e-07	1.68275e-05	-8.19242e-07	1.03141e-07	-6.4329e-06	0.701414	0.000210322	-5.66643e-06	3.07804e-07	-1.46783e-05	9.09237e-15	-1.87749e-06	2.58329e-05	6.50745e-08	1.11789e-06	-5.09145e-08	2.32057e-06	-0.000109102	-7.93508e-06	-7.30583e-08	1.24794e-05	-4.65001e-05	-5.51238e-10	-2.052e-05	3.12106e-09	6.51636e-10	-1.32096e-05	4.04623e-07	-6.58812e-07	1.42882e-05	-7.8091e-06	6.71726e-07	3.97071e-09	2.53107e-05	1.45807e-06	1.64951e-06	7.26608e-05	-1.54518e-13	3.00208e-06	0.0728241	-5.17363e-07	1.45996e-06	4.93236e-05	-1.44953e-11	2.34972e-12	9.53231e-06	-8.40702e-12	-7.05856e-06
0.556104	6.7822e-07	-1.18453e-06	6.8939e-06	0.208328	1.30707e-05	-7.20196e-19	-7.72235e-06	-9.76687e-19	6.34591e-05	-2.26663e-19	-1.95627e-07	-5.99911e-20	-6.26651e-21	-8.59798e-09	-3.49253e-05	-6.76594e-11	-8.17845e-06	-4.92154e-05	-6.81233e-12	7.71027e-05	-2.93205e-07	1.29157e-05	8.07782e-07	-1.14571e-06	0.512054	-5.81153e-05	8.66306e-05	-0.000298636	2.91165e-07	1.90149e-05	1.16316e-14	-2.05311e-05	4.51089e-05	-6.88198e-08	-5.05465e-06	-0.000122629	1.47571e-05	-2.26983e-05	8.69529e-06	-6.70961e-05	9.57836e-06	6.74433e-05	-9.88462e-10	3.8649e-05	-3.57902e-09	-3.0083e-09	7.62179e-06	-1.43486e-06	-3.79668e-06	-1.04342e-05	0.620603	6.35414e-07	-4.56661e-09	-2.44938e-05	-4.05561e-06	-1.66651e-06	-0.000210675	-7.23776e-12	1.98367e-05	-1.82032e-05	-0.000145724	-1.34124e-06	-5.31922e-05	-3.1797e-12	2.60917e-11	7.3164e-06	1.18581e-12	1.00235e-05
3.25923e-06	-3.52709e-06	-4.51006e-05	-2.10442e-05	1.22207e-06	5.23832e-05	-1.64501e-18	-0.000122248	-2.67745e-18	0.000554427	-9.72026e-19	-1.86923e-06	-3.8011e-19	7.86476e-20	2.44884e-07	-0.000264229	-7.72608e-10	-0.00375126	-6.22183e-05	5.61405e-11	0.00051815	0.0420528	-5.57764e-05	3.23732e-06	0.196144	3.33337e-06	-0.000509476	-0.000256662	-0.000783495	-1.52627e-06	-0.560891	7.94976e-14	-0.000286543	-0.000137699	-9.0548e-07	-0.0188577	0.000194907	0.000129808	-0.000392481	-0.156696	-0.000638387	-4.13641e-05	0.000223046	1.33892e-08	0.000359574	-4.49938e-08	-1.56448e-08	-0.0731215	-5.46318e-05	-2.73739e-05	-0.380313	3.64051e-06	-3.3308e-06	-5.73172e-08	-0.200853	0.653458	-2.25137e-05	-0.0016541	6.44629e-11	0.00015825	-0.000145447	-0.00138282	-0.0117146	0.000139017	4.03811e-11	2.61088e-10	-3.15958e-05	1.00317e-10	-0.0525222
8.37097e-07	-6.82216e-05	0.636345	-0.000346523	3.04293e-07	-0.00277423	1.03465e-17	-0.00149964	6.21829e-18	0.00247586	9.49015e-18	1.3807e-05	2.40182e-18	1.13736e-19	1.79726e-07	0.00407012	-4.25882e-10	0.000993658	0.000973216	-4.32072e-10	0.00698468	7.39876e-07	-0.00910518	-0.00017145	-2.23455e-05	-2.05218e-06	-0.00685393	-0.0175061	-0.01028	-1.58371e-05	0.000117087	2.96954e-14	-0.00346774	-0.00226741	-1.40361e-06	-0.000232494	0.00457055	-0.000284879	-0.0048174	-9.60408e-06	-0.00358012	-0.00675245	-0.00231434	-2.54154e-08	0.00441133	-5.59896e-08	1.45707e-07	-0.000295829	0.770825	0.000621325	-2.33809e-06	9.0648e-07	-3.45615e-05	-7.06928e-08	-0.000262628	2.0716e-05	-3.87514e-05	0.00554265	7.02959e-10	-0.000384598	-0.00195794	-0.00760188	-1.09613e-05	0.00226531	5.4942e-11	-1.73538e-10	-0.00515783	-1.67496e-10	-0.0003727

Dimensionality Reduction

Dimensionality Reduction¶

Dimensionality Reduction¶

PCA (Principal Component Analysis)¶

PCA in Sklearn¶

Direction of maximum Variance¶

Overlay principle axis with data.¶

Quantify the amount of variace explained by this principle component.¶

Reduced Features in X¶

SVD (Singular Value Decomposition)¶

SVD with Sparse Matrices¶

0	0	0
0	0	0.097857
0	0.535371	0
0	0	0
0.34102	0	0