NAME = ""
COLLABORATORS = ""


# Set up library imports and define a helper function to display numpy arrays.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import nose.tools as test_# For testing

# Useful in beautifying numpy arrays.
from IPython.display import HTML, display
import tabulate
def pp(a, show_head=False): 
    '''
    args: show_head -> if True print only first 5 rows.
    return: None
    '''
    if a.ndim < 2:
        a = [a]
    if show_head:
        display(HTML(tabulate.tabulate(a[:5], tablefmt='html')))
        return
    display(HTML(tabulate.tabulate(a, tablefmt='html')))


# Setup any library imports here
# YOUR CODE HERE
raise NotImplementedError()


def image_read(image_path):
    '''
    Read image in an ndarray
    
    args: string -> path of jpg image
    return: ndarray -> shape (m,n,c) 
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


image_path = 'image_data/woman_observes_art_small_image.jpg'
test_.ok_ (image_read(image_path).shape == (666, 1000, 3))
test_.ok_ ((np.isclose(image_read(image_path)[0,0,:], np.array([240, 240, 240]))).all())


small_image_path = 'image_data/woman_observes_art_small_image.jpg'
small_img = image_read(small_image_path)

large_image_path = 'image_data/passengers_ride_large_image.jpg'
large_img = image_read(large_image_path)


def display_image(image, _title):
    '''
    Display the image.
    
    args: image -> ndarray
          title -> string
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


# All test cases hidden.


display_image(small_img, 'Original Small image with %d colors (pixels)'%666000)


display_image(large_img, 'Original Large image with %d colors (pixels)'%1998000)


def preprocess(image_np):
    '''
    Preprocess the input array. Normalize by max value and convert dtype to 
    np.float64
    
    args: ndarray -> shape (m, n, c)
    return: ndarray -> shape (m, n, c) 
    
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.eq_ (preprocess(small_img).shape, small_img.shape)
# check data type
test_.eq_  (preprocess(small_img).dtype, np.float64)
test_.ok_ (np.isclose(preprocess(small_img)[:3,:3,:], np.array([[[0.94117647, 0.94117647, 0.94117647],
        [0.94117647, 0.94117647, 0.94117647],
        [0.94117647, 0.94117647, 0.94117647]],

       [[0.9372549 , 0.9372549 , 0.9372549 ],
        [0.9372549 , 0.9372549 , 0.9372549 ],
        [0.9372549 , 0.9372549 , 0.9372549 ]],

       [[0.93333333, 0.93333333, 0.93333333],
        [0.9372549 , 0.9372549 , 0.9372549 ],
        [0.9372549 , 0.9372549 , 0.9372549 ]]])).all())


small_image_preprocessed = preprocess(small_img)
large_image_preprocessed = preprocess(large_img)


def make_rank_2(image):
    '''
    arg: ndarray -> shape (m, n, c)
    return: ndarray -> shape (m*n, c)
    
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.eq_ (make_rank_2(small_image_preprocessed).shape, (666000, 3))

test_.ok_ (np.isclose(make_rank_2(small_image_preprocessed)[:3], np.array([
    [0.94117647, 0.94117647, 0.94117647],
    [0.94117647, 0.94117647, 0.94117647],
    [0.94117647, 0.94117647, 0.94117647]])).all())


small_image_rank2 = make_rank_2(small_image_preprocessed)
large_image_rank2 = make_rank_2(large_image_preprocessed)


size_reduce_by = 100
n_image_pixels = small_img.shape[0] * small_img.shape[1]
reduced_sample_size  = int(n_image_pixels / size_reduce_by)


# Setup any library imports here
# YOUR CODE HERE
raise NotImplementedError()


def get_random_sample(image, sample_size):
    '''
    Extract random pixels of size 'sample_size' from image (ndarray)
    
    args: image -> ndarray -> shape (n*m, c)
    return: image -> ndarray -> shape (sample_size, c) where sample_size <= n*m
    '''
    
    # YOUR CODE HERE
    raise NotImplementedError()


test_.eq_ (get_random_sample(small_image_rank2, reduced_sample_size).shape, (6660, 3))
# test randomness
test_.ok_ (not (np.isclose(small_image_rank2[0], \
                           [0.13333333, 0.23529412, 0.52941176])).all())


small_image_sample = get_random_sample(small_image_rank2, reduced_sample_size)
large_image_sample = get_random_sample(large_image_rank2, reduced_sample_size)


n_colors_small_image = 128


def train_kmeans(image, _random_state, n_colors):
        
    ''' Train a kmeans algorithm on given image. Function already imported above.
        Use the provided _random_state and n_colors (which is essentially the number of 
                                                    clusters)
        args: image -> ndarray -> shape (a, c)
              _random_state -> int
              n_colors -> int
        
        
        return: A fitted sklearn.cluster._kmeans.KMeans object 
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


kmeans_test = train_kmeans(small_image_rank2, 20, 6)


test_.eq_(kmeans_test.labels_[10], 120)


kmeans = train_kmeans(small_image_rank2, 5, n_colors_small_image)


def pred_labels(kmeans, image):
    ''' 
    Predict labels (cluster to which each pixel in image belongs).
    
    args: ndarray -> shape (a, c)
          sklearn.cluster._kmeans.KMeans object
          
    return: ndarray -> shape
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.ok_ ((pred_labels(kmeans, small_image_rank2[:5]) == \
            np.asarray([119, 119, 119, 119, 119])).all())


predicted_labels = pred_labels(kmeans, small_image_rank2)


centroids = kmeans.cluster_centers_
pp(centroids, show_head=True)


def get_compressed_image(centroids, labels, m, n):
    '''
    Create a compressed image using centroids as pixel values
    
    args: centroids -> ndarray -> shape (c, 3) (e.g. (128, 3))
          labels -> ndarray -> shape (m*n,) (e.g. (666000 for small img))
          m -> int (x dim of the output image)
          n -> int (y dim of the output image)
    
    return: image -> ndarray -> shape (m, n, c)
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


m, n, c = tuple(small_img.shape)


new_img = get_compressed_image(centroids, predicted_labels, m, n)

test_.eq_(new_img.shape, (666, 1000, 3))


compressed_small_img = get_compressed_image(centroids, predicted_labels, m, n)


display_image(compressed_small_img, 'compressed small image with  %d colors'%128)


def find_distinct_elms(image):
    '''
    Return unique elements in image.
    
    args: ndarray -> shape (m, n, c)
    return: ndarray -> shape (d,) where is total unique elments
    '''
    # YOUR CODE HERE
    raise NotImplementedError()


test_.ok_(np.isclose(find_distinct_elms(compressed_small_img)[:5], np.asarray\
([0.01314879, 0.01349481, 0.01421569, 0.01464821, 0.0172208 ])).all())

Test 5

Clustering¶

`For Image Compression`¶

Introduction¶

Question 1¶

Question 2¶

Question 3¶

Question 4¶

Question 5¶

Question 6¶

Question 7¶

Image Compression¶

Question 8¶

Compressed Image Display¶

Question 9¶

Ungraded Question¶

Clustering¶

For Image Compression¶

Introduction¶

Question 1¶

Question 2¶

Question 3¶

Question 4¶

Question 5¶

Question 6¶

Question 7¶

Image Compression¶

Question 8¶

Compressed Image Display¶

Question 9¶

Ungraded Question¶

`For Image Compression`¶