Test 5 Solution
# Set up library imports and define a helper function to display numpy arrays.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import nose.tools as test_# For testing
# Useful in beautifying numpy arrays.
from IPython.display import HTML, display
import tabulate
def pp(a, show_head=False):
'''
args: show_head -> if True print only first 5 rows.
return: None
'''
if a.ndim < 2:
a = [a]
if show_head:
display(HTML(tabulate.tabulate(a[:5], tablefmt='html')))
return
display(HTML(tabulate.tabulate(a, tablefmt='html')))
Introduction¶
We provide you with two images in images dataset
directory. One- called woman_observes_art.jpg
- is of size 223KB and the other is of size 4.1MB. We'll compress these images using K-means clustering algorithm.
Consider the the smaller image of size 223KB; the image is (666 by 1000) image. This means that, for each of the 666000 pixels in the image, there are three 8-bit numbers (each ranging from 0 to 255) that represent the red, green, and blue intensity values for that pixel. The straightforward representation of this image therefore takes about 666000×3 = 1998000 bytes (a byte being 8 bits). To compress the image, we will use K-means to reduce the image to k = 128 colors. More specifically, each pixel in the image is considered a point in the three-dimensional (r, g, b)-space. To compress the image, we will cluster these points in color-space into 128 clusters, and replace each pixel with the closest cluster centroid. Likewise for any other image.
We'll use sklearn implementation of K-means which you saw in the tutorial.
Question 1¶
First of all write a function which takes as input a path to a colored jpeg image and returns the corresponding ndarray of shape (m,n,c) where c is 3 i.e. number of channels (R,G,B).
# Setup any library imports here
### BEGIN SOLUTION
from skimage import io
### END SOLUTION
def image_read(image_path):
'''
Read image in an ndarray
args: string -> path of jpg image
return: ndarray -> shape (m,n,c)
'''
### BEGIN SOLUTION
image_np = io.imread(image_path)
return image_np
### END SOLUTION
image_path = 'image_data/woman_observes_art_small_image.jpg'
test_.ok_ (image_read(image_path).shape == (666, 1000, 3))
test_.ok_ ((np.isclose(image_read(image_path)[0,0,:], np.array([240, 240, 240]))).all())
### BEGIN HIDDEN TESTS
image_path = 'image_data/passengers_ride_large_image.jpg'
test_.eq_ (image_read(image_path).shape, (3602, 5397, 3))
# test a arbitrary pixel
test_.ok_ (image_read(image_path)[5,5,2] == 88 )
test_.ok_ ((image_read(image_path)[5,4:6,:] == np.array([[56, 64, 83],
[60, 72, 88]])).all())
### END HIDDEN TESTS
small_image_path = 'image_data/woman_observes_art_small_image.jpg'
small_img = image_read(small_image_path)
large_image_path = 'image_data/passengers_ride_large_image.jpg'
large_img = image_read(large_image_path)
def display_image(image, _title):
'''
Display the image.
args: image -> ndarray
title -> string
'''
### BEGIN SOLUTION
plt.axis('off')
plt.title(_title)
plt.imshow(image)
### END SOLUTION
# All test cases hidden.
### BEGIN HIDDEN TESTS
test_.ok_((display_image(small_img, "")) == None)
### END HIDDEN TESTS
display_image(small_img, 'Original Small image with %d colors (pixels)'%666000)
display_image(large_img, 'Original Large image with %d colors (pixels)'%1998000)
Question 3¶
We've loaded the image as ndarrays. Let's preprocess this image. Convert datatype of each entry of this ndarray to float64 and normalize by dividing each entry by maximum value a pixel can take (you should find the maximum value in the input array using numpy operations. Don't hardcode the value).
def preprocess(image_np):
'''
Preprocess the input array. Normalize by max value and convert dtype to
np.float64
args: ndarray -> shape (m, n, c)
return: ndarray -> shape (m, n, c)
'''
### BEGIN SOLUTION
_max_val = image_np.max()
return image_np.astype(np.float64) / _max_val
### END SOLUTION
test_.eq_ (preprocess(small_img).shape, small_img.shape)
# check data type
test_.eq_ (preprocess(small_img).dtype, np.float64)
test_.ok_ (np.isclose(preprocess(small_img)[:3,:3,:], np.array([[[0.94117647, 0.94117647, 0.94117647],
[0.94117647, 0.94117647, 0.94117647],
[0.94117647, 0.94117647, 0.94117647]],
[[0.9372549 , 0.9372549 , 0.9372549 ],
[0.9372549 , 0.9372549 , 0.9372549 ],
[0.9372549 , 0.9372549 , 0.9372549 ]],
[[0.93333333, 0.93333333, 0.93333333],
[0.9372549 , 0.9372549 , 0.9372549 ],
[0.9372549 , 0.9372549 , 0.9372549 ]]])).all())
### BEGIN HIDDEN TESTS
test_.eq_ (preprocess(large_img).shape, large_img.shape)
test_.ok_ (np.isclose(preprocess(large_img)[:2,1,:], np.array([[0.25098039, 0.27058824, 0.34901961],
[0.24705882, 0.26666667, 0.34117647]])).all())
test_.eq_ (preprocess(large_img).dtype, np.float64)
### END HIDDEN TESTS
small_image_preprocessed = preprocess(small_img)
large_image_preprocessed = preprocess(large_img)
Question 4¶
At this point, our image is preprocessed and it has shape (m, n, c)- rank 3 ndarray. Now, we'll reshape it to (m*n, c)- rank 2 ndarray.
def make_rank_2(image):
'''
arg: ndarray -> shape (m, n, c)
return: ndarray -> shape (m*n, c)
'''
### BEGIN SOLUTION
m, n, c = tuple(image.shape)
return image.reshape(-1, c)
### END SOLUTION
test_.eq_ (make_rank_2(small_image_preprocessed).shape, (666000, 3))
test_.ok_ (np.isclose(make_rank_2(small_image_preprocessed)[:3], np.array([
[0.94117647, 0.94117647, 0.94117647],
[0.94117647, 0.94117647, 0.94117647],
[0.94117647, 0.94117647, 0.94117647]])).all())
### BEGIN HIDDEN TESTS
test_.eq_(make_rank_2(large_image_preprocessed).shape, (19439994, 3))
test_.ok_ (np.isclose(make_rank_2(large_image_preprocessed)[4:5,],
np.array([[0.2627451 , 0.28235294, 0.35686275]])).all())
### END HIDDEN TESTS
small_image_rank2 = make_rank_2(small_image_preprocessed)
large_image_rank2 = make_rank_2(large_image_preprocessed)
Now, our image is ready to be fed into a K-means as training data. However, our small image has 666000 pixels and the large image has 19439994 pixels, and would take a long time to cluster, we will instead run K-means on a smaller sample, say 100 times smaller sample for the sake of this test.
size_reduce_by = 100
n_image_pixels = small_img.shape[0] * small_img.shape[1]
reduced_sample_size = int(n_image_pixels / size_reduce_by)
Question 5¶
Given a reduced sample size s
and an image, you'll return the image with only s
pixels randomly picked.
# Setup any library imports here
### BEGIN SOLUTION
from sklearn.utils import shuffle
### END SOLUTION
def get_random_sample(image, sample_size):
'''
Extract random pixels of size 'sample_size' from image (ndarray)
args: image -> ndarray -> shape (n*m, c)
return: image -> ndarray -> shape (sample_size, c) where sample_size <= n*m
'''
### BEGIN SOLUTION
return shuffle(image, random_state=0)[:sample_size]
### END SOLUTION
test_.eq_ (get_random_sample(small_image_rank2, reduced_sample_size).shape, (6660, 3))
# test randomness
test_.ok_ (not (np.isclose(small_image_rank2[0], \
[0.13333333, 0.23529412, 0.52941176])).all())
small_image_sample = get_random_sample(small_image_rank2, reduced_sample_size)
large_image_sample = get_random_sample(large_image_rank2, reduced_sample_size)
Question 6¶
Finally, train a K-means clustering algorithm. Specifically, given an image sample, number of colors (i.e. number of clusters to extract) and random state, return a fitted sklearn.cluster._kmeans.KMeans
object.
n_colors_small_image = 128
def train_kmeans(image, _random_state, n_colors):
''' Train a kmeans algorithm on given image. Function already imported above.
Use the provided _random_state and n_colors (which is essentially the number of
clusters)
args: image -> ndarray -> shape (a, c)
_random_state -> int
n_colors -> int
return: A fitted sklearn.cluster._kmeans.KMeans object
'''
### BEGIN SOLUTION
kmeans = KMeans(n_clusters=n_colors_small_image, random_state=5).fit(small_image_sample)
return kmeans
### END SOLUTION
kmeans_test = train_kmeans(small_image_rank2, 20, 6)
test_.eq_(kmeans_test.labels_[10], 120)
### BEGIN HIDDEN TESTS
test_.ok_((kmeans_test.labels_[:5] == np.asarray([10, 6, 52, 37, 25])).all())
### END HIDDEN TESTS
kmeans = train_kmeans(small_image_rank2, 5, n_colors_small_image)
Question 7¶
Now, using the trained kmeans algorithm, predict the cluster to which each example of the input image belongs.
def pred_labels(kmeans, image):
'''
Predict labels (cluster to which each pixel in image belongs).
args: ndarray -> shape (a, c)
sklearn.cluster._kmeans.KMeans object
return: ndarray -> shape
'''
### BEGIN SOLUTION
pred_labels = kmeans.predict(image)
return pred_labels
### END SOLUTION
test_.ok_ ((pred_labels(kmeans, small_image_rank2[:5]) == \
np.asarray([119, 119, 119, 119, 119])).all())
### BEGIN HIDDEN TESTS
test_.ok_ ((pred_labels(kmeans, small_image_rank2[10:15]) == \
np.asarray([ 5, 119, 119, 119, 119])).all())
### END HIDDEN TESTS
predicted_labels = pred_labels(kmeans, small_image_rank2)
Image Compression¶
First 5 cluster centroids are displayed below:
centroids = kmeans.cluster_centers_
pp(centroids, show_head=True)
0.563171 | 0.564649 | 0.55493 |
0.863332 | 0.868801 | 0.865146 |
0.0535241 | 0.127398 | 0.350291 |
0.273366 | 0.349837 | 0.51683 |
0.68467 | 0.730481 | 0.799287 |
There are 128 cluster centroids. And there are 666000 colors (pixels) in our small image. During centroid assignment, each pixel was assigned to one of the centroid. We'll replace those 666000 different color values with these 128 values such that if pixel i belongs to centroid j, then that pixel i will be replaced with centroid j. Thereby; we've compressed our image to one which only has 128 different colors instead of 666000.
Question 8¶
Implement the function below which given the centroids, labels (cluster assignment of pixels) and dimensions of the image, returns the compressed image (as explained above).
def get_compressed_image(centroids, labels, m, n):
'''
Create a compressed image using centroids as pixel values
args: centroids -> ndarray -> shape (c, 3) (e.g. (128, 3))
labels -> ndarray -> shape (m*n,) (e.g. (666000 for small img))
m -> int (x dim of the output image)
n -> int (y dim of the output image)
return: image -> ndarray -> shape (m, n, c)
'''
### BEGIN SOLUTION
c = centroids.shape[1]
image = np.zeros((m, n, c))
label_idx = 0
for i in range(m):
for j in range(n):
image[i][j] = centroids[labels[label_idx]]
label_idx += 1
return image
### END SOLUTION
m, n, c = tuple(small_img.shape)
new_img = get_compressed_image(centroids, predicted_labels, m, n)
test_.eq_(new_img.shape, (666, 1000, 3))
compressed_small_img = get_compressed_image(centroids, predicted_labels, m, n)
Compressed Image Display¶
display_image(compressed_small_img, 'compressed small image with %d colors'%128)
Question 9¶
As a sanity check, the compressed image should only have centroids. Each centroid has 3 coordinates. If there are 128 centroids, then ~(128*3) unique elements should exist in compressed_small_img. Implement the function below to find the unique elements.
def find_distinct_elms(image):
'''
Return unique elements in image.
args: ndarray -> shape (m, n, c)
return: ndarray -> shape (d,) where is total unique elments
'''
### BEGIN SOLUTION
return np.unique(image)
### END SOLUTION
test_.ok_(np.isclose(find_distinct_elms(compressed_small_img)[:5], np.asarray\
([0.01314879, 0.01349481, 0.01421569, 0.01464821, 0.0172208 ])).all())
### BEGIN HIDDEN TESTS
test_.eq_(len(find_distinct_elms(compressed_small_img)), 382)
### END HIDDEN TESTS
Ungraded Question¶
Now, compress the larger image. Compare quality with smaller image. See if you improve quality by using more than 128 colors for larger image.