# Set up library imports and define a helper function to display numpy arrays.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import nose.tools as test_# For testing

# Useful in beautifying numpy arrays.
from IPython.display import HTML, display
import tabulate
def pp(a, show_head=False): 
    '''
    args: show_head -> if True print only first 5 rows.
    return: None
    '''
    if a.ndim < 2:
        a = [a]
    if show_head:
        display(HTML(tabulate.tabulate(a[:5], tablefmt='html')))
        return
    display(HTML(tabulate.tabulate(a, tablefmt='html')))


# Setup any library imports here
### BEGIN SOLUTION
from skimage import io
### END SOLUTION


def image_read(image_path):
    '''
    Read image in an ndarray
    
    args: string -> path of jpg image
    return: ndarray -> shape (m,n,c) 
    '''
    ### BEGIN SOLUTION
    image_np = io.imread(image_path)
    return image_np
    ### END SOLUTION


image_path = 'image_data/woman_observes_art_small_image.jpg'
test_.ok_ (image_read(image_path).shape == (666, 1000, 3))
test_.ok_ ((np.isclose(image_read(image_path)[0,0,:], np.array([240, 240, 240]))).all())
### BEGIN HIDDEN TESTS
image_path = 'image_data/passengers_ride_large_image.jpg'
test_.eq_ (image_read(image_path).shape, (3602, 5397, 3))
# test a arbitrary pixel
test_.ok_ (image_read(image_path)[5,5,2] == 88 )
test_.ok_ ((image_read(image_path)[5,4:6,:] == np.array([[56, 64, 83],
                                                    [60, 72, 88]])).all())
### END HIDDEN TESTS


small_image_path = 'image_data/woman_observes_art_small_image.jpg'
small_img = image_read(small_image_path)

large_image_path = 'image_data/passengers_ride_large_image.jpg'
large_img = image_read(large_image_path)


def display_image(image, _title):
    '''
    Display the image.
    
    args: image -> ndarray
          title -> string
    '''
    ### BEGIN SOLUTION
    plt.axis('off')
    plt.title(_title)
    plt.imshow(image)
    ### END SOLUTION


# All test cases hidden.
### BEGIN HIDDEN TESTS
test_.ok_((display_image(small_img, "")) == None)
### END HIDDEN TESTS


display_image(small_img, 'Original Small image with %d colors (pixels)'%666000)


display_image(large_img, 'Original Large image with %d colors (pixels)'%1998000)


def preprocess(image_np):
    '''
    Preprocess the input array. Normalize by max value and convert dtype to 
    np.float64
    
    args: ndarray -> shape (m, n, c)
    return: ndarray -> shape (m, n, c) 
    
    '''
    ### BEGIN SOLUTION
    _max_val = image_np.max()
    return image_np.astype(np.float64) / _max_val
    ### END SOLUTION


test_.eq_ (preprocess(small_img).shape, small_img.shape)
# check data type
test_.eq_  (preprocess(small_img).dtype, np.float64)
test_.ok_ (np.isclose(preprocess(small_img)[:3,:3,:], np.array([[[0.94117647, 0.94117647, 0.94117647],
        [0.94117647, 0.94117647, 0.94117647],
        [0.94117647, 0.94117647, 0.94117647]],

       [[0.9372549 , 0.9372549 , 0.9372549 ],
        [0.9372549 , 0.9372549 , 0.9372549 ],
        [0.9372549 , 0.9372549 , 0.9372549 ]],

       [[0.93333333, 0.93333333, 0.93333333],
        [0.9372549 , 0.9372549 , 0.9372549 ],
        [0.9372549 , 0.9372549 , 0.9372549 ]]])).all())
### BEGIN HIDDEN TESTS
test_.eq_ (preprocess(large_img).shape, large_img.shape)
test_.ok_ (np.isclose(preprocess(large_img)[:2,1,:], np.array([[0.25098039, 0.27058824, 0.34901961],
       [0.24705882, 0.26666667, 0.34117647]])).all())
test_.eq_ (preprocess(large_img).dtype, np.float64)

### END HIDDEN TESTS


small_image_preprocessed = preprocess(small_img)
large_image_preprocessed = preprocess(large_img)


def make_rank_2(image):
    '''
    arg: ndarray -> shape (m, n, c)
    return: ndarray -> shape (m*n, c)
    
    '''
    ### BEGIN SOLUTION
    m, n, c = tuple(image.shape)
    return image.reshape(-1, c)
    ### END SOLUTION


test_.eq_ (make_rank_2(small_image_preprocessed).shape, (666000, 3))

test_.ok_ (np.isclose(make_rank_2(small_image_preprocessed)[:3], np.array([
    [0.94117647, 0.94117647, 0.94117647],
    [0.94117647, 0.94117647, 0.94117647],
    [0.94117647, 0.94117647, 0.94117647]])).all())
### BEGIN HIDDEN TESTS
test_.eq_(make_rank_2(large_image_preprocessed).shape, (19439994, 3))

test_.ok_ (np.isclose(make_rank_2(large_image_preprocessed)[4:5,],
                  np.array([[0.2627451 , 0.28235294, 0.35686275]])).all())

### END HIDDEN TESTS


small_image_rank2 = make_rank_2(small_image_preprocessed)
large_image_rank2 = make_rank_2(large_image_preprocessed)


size_reduce_by = 100
n_image_pixels = small_img.shape[0] * small_img.shape[1]
reduced_sample_size  = int(n_image_pixels / size_reduce_by)


# Setup any library imports here
### BEGIN SOLUTION
from sklearn.utils import shuffle
### END SOLUTION


def get_random_sample(image, sample_size):
    '''
    Extract random pixels of size 'sample_size' from image (ndarray)
    
    args: image -> ndarray -> shape (n*m, c)
    return: image -> ndarray -> shape (sample_size, c) where sample_size <= n*m
    '''
    
    ### BEGIN SOLUTION
    return shuffle(image, random_state=0)[:sample_size]
    ### END SOLUTION


test_.eq_ (get_random_sample(small_image_rank2, reduced_sample_size).shape, (6660, 3))
# test randomness
test_.ok_ (not (np.isclose(small_image_rank2[0], \
                           [0.13333333, 0.23529412, 0.52941176])).all())


small_image_sample = get_random_sample(small_image_rank2, reduced_sample_size)
large_image_sample = get_random_sample(large_image_rank2, reduced_sample_size)


n_colors_small_image = 128


def train_kmeans(image, _random_state, n_colors):
        
    ''' Train a kmeans algorithm on given image. Function already imported above.
        Use the provided _random_state and n_colors (which is essentially the number of 
                                                    clusters)
        args: image -> ndarray -> shape (a, c)
              _random_state -> int
              n_colors -> int
        
        
        return: A fitted sklearn.cluster._kmeans.KMeans object 
    '''
    ### BEGIN SOLUTION
    kmeans = KMeans(n_clusters=n_colors_small_image, random_state=5).fit(small_image_sample)
    return kmeans
    ### END SOLUTION


kmeans_test = train_kmeans(small_image_rank2, 20, 6)


test_.eq_(kmeans_test.labels_[10], 120)
### BEGIN HIDDEN TESTS
test_.ok_((kmeans_test.labels_[:5] == np.asarray([10,  6, 52, 37, 25])).all())
### END HIDDEN TESTS


kmeans = train_kmeans(small_image_rank2, 5, n_colors_small_image)


def pred_labels(kmeans, image):
    ''' 
    Predict labels (cluster to which each pixel in image belongs).
    
    args: ndarray -> shape (a, c)
          sklearn.cluster._kmeans.KMeans object
          
    return: ndarray -> shape
    '''
    ### BEGIN SOLUTION
    pred_labels = kmeans.predict(image)
    return pred_labels
    ### END SOLUTION


test_.ok_ ((pred_labels(kmeans, small_image_rank2[:5]) == \
            np.asarray([119, 119, 119, 119, 119])).all())
### BEGIN HIDDEN TESTS
test_.ok_ ((pred_labels(kmeans, small_image_rank2[10:15]) == \
            np.asarray([ 5, 119, 119, 119, 119])).all())
### END HIDDEN TESTS


predicted_labels = pred_labels(kmeans, small_image_rank2)


centroids = kmeans.cluster_centers_
pp(centroids, show_head=True)


def get_compressed_image(centroids, labels, m, n):
    '''
    Create a compressed image using centroids as pixel values
    
    args: centroids -> ndarray -> shape (c, 3) (e.g. (128, 3))
          labels -> ndarray -> shape (m*n,) (e.g. (666000 for small img))
          m -> int (x dim of the output image)
          n -> int (y dim of the output image)
    
    return: image -> ndarray -> shape (m, n, c)
    '''
    ### BEGIN SOLUTION
    c = centroids.shape[1]
    image = np.zeros((m, n, c))
    label_idx = 0
    for i in range(m):
        for j in range(n):
            image[i][j] = centroids[labels[label_idx]]
            label_idx += 1
    return image
    ### END SOLUTION


m, n, c = tuple(small_img.shape)


new_img = get_compressed_image(centroids, predicted_labels, m, n)

test_.eq_(new_img.shape, (666, 1000, 3))


compressed_small_img = get_compressed_image(centroids, predicted_labels, m, n)


display_image(compressed_small_img, 'compressed small image with  %d colors'%128)


def find_distinct_elms(image):
    '''
    Return unique elements in image.
    
    args: ndarray -> shape (m, n, c)
    return: ndarray -> shape (d,) where is total unique elments
    '''
    ### BEGIN SOLUTION
    return np.unique(image)
    ### END SOLUTION


test_.ok_(np.isclose(find_distinct_elms(compressed_small_img)[:5], np.asarray\
([0.01314879, 0.01349481, 0.01421569, 0.01464821, 0.0172208 ])).all())
### BEGIN HIDDEN TESTS
test_.eq_(len(find_distinct_elms(compressed_small_img)), 382)
### END HIDDEN TESTS

0.563171	0.564649	0.55493
0.863332	0.868801	0.865146
0.0535241	0.127398	0.350291
0.273366	0.349837	0.51683
0.68467	0.730481	0.799287

Test 5 Solution

Clustering¶

`For Image Compression`¶

Introduction¶

Question 1¶

Question 2¶

Question 3¶

Question 4¶

Question 5¶

Question 6¶

Question 7¶

Image Compression¶

Question 8¶

Compressed Image Display¶

Question 9¶

Ungraded Question¶

Clustering¶

For Image Compression¶

Introduction¶

Question 1¶

Question 2¶

Question 3¶

Question 4¶

Question 5¶

Question 6¶

Question 7¶

Image Compression¶

Question 8¶

Compressed Image Display¶

Question 9¶

Ungraded Question¶

`For Image Compression`¶