Feel free to go online. In fact, we encourage you to read documentations where needed. However, you may not collaborate with anybody. To certify that you didn't collaborate with anyone you'll write 'Nobody' in 'collaborators' above.
import numpy as np
from nose.tools import assert_equal
from nose.tools import assert_not_equal
import pandas as pd
This test covers Python, Numpy and Pandas. There are 10 questions. Questions are of varying difficulty and not ordered randomly. You may see a difficult question earlier and an easier later. Feel free to go online; in fact you're encouraged to read official documentation
zeroth row
instead of first row
. Likewise for columns.In the input string, if you see an alphabet that is lower case, convert it to upper case. if the alphabet is upper case, convert it to lower case. Any non-alphabet char e.g. $53,*#@!$ should be returned as it is.
Try to solve this in one line using list comprehension.
# swap cases
def swap_cases(s):
'''arg: string
return: string (cases swapped)
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return "".join([l.upper() if l.islower() else l.lower() for l in s])
### END SOLUTION
assert swap_cases('ABcDef') == 'abCdEF'
assert swap_cases('hl4434*9889DdfF') == 'HL4434*9889dDFf'
assert swap_cases('') == ''
assert_not_equal(swap_cases('ABC'), 'ABC')
### BEGIN HIDDEN TESTS
assert swap_cases('H') == 'h'
assert swap_cases('***(((())))') == '***(((())))'
assert_not_equal(swap_cases('Hello'), 'Hello')
assert swap_cases('DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))DSFAsflaskdflsakflas***(((())))') == 'dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))dsfaSFLASKDFLSAKFLAS***(((())))'
### END HIDDEN TESTS
Given a list of numbers, find square of positive even numbers and return their sum.
You should use list comprehension.
def sum_of_sqs_of_pos_even_elms_of_list(_list):
'''args: list
return: int
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return sum([elm**2 for elm in _list if (not elm % 2 and elm > 0)])
### END SOLUTION
4
assert sum_of_sqs_of_pos_even_elms_of_list([2,4]) == 20
assert sum_of_sqs_of_pos_even_elms_of_list([2,4,3,6,7,4,3]) == 72
assert sum_of_sqs_of_pos_even_elms_of_list([-6,4,2,2.4,204.2]) == 20
### BEGIN HIDDEN TESTS
assert sum_of_sqs_of_pos_even_elms_of_list([5.8]) == 0
assert sum_of_sqs_of_pos_even_elms_of_list([0,0,1,-2]) == 0
assert sum_of_sqs_of_pos_even_elms_of_list([4,5,6,3,2]) == 56
### END HIDDEN TESTS
Extract the 2nd last row of the given numpy array.
def extract_2nd_last_row(arr):
'''arg: ndarray
You can assume that ndarray has at least 2 rows.
return: ndarray
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return arr[-2, :]
### END SOLUTION
arr2 = np.array([[5,52,2],[6,43,2], [1,2,4]])
assert (extract_2nd_last_row(arr2) == np.array([6,43,2])).all()
Give an ndarray of shape $n\times m \ where \ n, m >= 2$, return an ndarray of shape $(n*m) \times m$ which contains either the zeroth or the first row of the input array such that if the element at index $(i,j)$ in the input array is an even
number then the $(i,j)th$ row of the output array will get the zeroth row
of the input. Siimilarly, if the element at index $(i,j)$ in the input array is an odd
number then the $(i+j)th$ row of the output array will get the first row
of the input. See an example below for clarifications:
arr2 = np.array([[1,23,4],[4,2,4]]) # input array of shape (2,3) i.e. (n,m)
get_first_and_second_rows(arr2) # Function called. observe the returned array printed below.
array([[ 4, 2, 4], # output array of shape (6,3) i.e. ((n*m), m) i.e. (2*3,3)
[ 4, 2, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4]])
"The input array 'arr2' at index (0,0) contains 1 (odd number), so, zeroth row (i+j is 0, hence the zeroth row) of the output array [ 4, 2, 4] is the first row in the input array [ 4, 2, 4]. Similarly, The input array 'arr2' at index (0,2) contains 1 (odd number), so (0+2)th row of the output array [ 1, 23, 4] is the zeroth row of the input array.
def get_zeroth_and_first_rows(a):
'''
arg: ndarray of shape (n,m) where n and m are at least 2.
return ndarray of shape ((n*m), m)
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return a[np.array([a%2]).reshape(-1)]
### END SOLUTION
# Test shapes returned
arr3 = np.ones((40,60), dtype=np.int16)
assert_equal(get_zeroth_and_first_rows(arr3).shape, (2400, 60))
# Test each row.
arr1 = np.array([[1,1,1],[2,2,2],[3,3,3]])
assert (get_zeroth_and_first_rows(arr1) == np.array([[2, 2, 2],
[2, 2, 2],
[2, 2, 2],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[2, 2, 2],
[2, 2, 2],
[2, 2, 2]])).all()
arr2 = np.array([[1,23,4],[4,2,4],[40,0,0]])
assert (get_zeroth_and_first_rows(arr2) == np.array([[ 4, 2, 4],
[ 4, 2, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4],
[ 1, 23, 4]])).all()
### BEGIN HIDDEN TESTS
arrh1 = np.zeros((5,4), dtype=np.int16)
assert (get_zeroth_and_first_rows(arrh1) == np.array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])).all()
### END HIDDEN TESTS
Questions 5, 6 and 7 are based on the following information
:¶There are $n$ students in a university. Each student is taking $m$ courses. At the end of the semester, scores of each student in $m$ final exams are reported in an ndarray of shape $(m,n)$. where $m,n \geq 2$
Calculate average (mean) and standard deviation of marks of each student in all final exams. You may not use any loops.
def mean_and_dev_students(marks):
'''
args: ndarray
return: tuple of ndarrays (mean_arr, std_arr)
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return np.mean(marks, axis=1), np.std(marks, axis=1)
### END SOLUTION
arr1 = np.array([[2,3,4],[3,42,3]])
assert (mean_and_dev_students(arr1)[0] == (np.array([ 3., 16.]))).all()
assert (mean_and_dev_students(arr1)[1] >= (np.array([ 0.8, 18.]))).all() # Floating point equality could be problematic. More on that in a computer systems course :)
assert (mean_and_dev_students(arr1)[1] <= np.array([ 0.85, 185])).all()
### BEGIN HIDDEN TESTS
arr2 = np.array([[24,33,4],[43,442,34]])
assert (mean_and_dev_students(arr2)[0] >= (np.array([ 20., 173.]))).all()
assert (mean_and_dev_students(arr2)[0] <= (np.array([ 20.5, 173.5]))).all()
assert (mean_and_dev_students(arr2)[1] >= (np.array([ 12., 190. ]))).all()
assert (mean_and_dev_students(arr2)[1] <= (np.array([ 12.5, 190.5 ]))).all()
### END HIDDEN TESTS
copied from above
: 'There are $n$ students in a university. Each student is taking $m$ courses. At the end of the semester, scores of each student in $m$ final exams are reported in an ndarray of shape $(m,n)$. where $m,n \geq 2$'
Calculate average (mean) and standard deviation of marks obtained in each final exam.
def mean_and_dev_exams(marks):
'''
args: ndarray
return: tuple of ndarrays (mean_arr, std_arr)
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return np.mean(marks, axis=0), np.std(marks, axis=0)
### END SOLUTION
arr1 = np.array([[2,3,4],[3,42,3]])
assert (mean_and_dev_exams(arr1)[0] == (np.array([ 2.5, 22.5, 3.5]))).all()
assert (mean_and_dev_exams(arr1)[1] == (np.array([ 0.5, 19.5, 0.5]))).all()
### BEGIN HIDDEN TESTS
arr2 = np.array([[24,33,4],[43,442,34]])
assert (mean_and_dev_exams(arr2)[0] == (np.array([ 33.5, 237.5, 19.]))).all()
assert (mean_and_dev_exams(arr2)[1] == (np.array([ 9.5, 204.5, 15. ]))).all()
### END HIDDEN TESTS
copied from above
: 'There are $n$ students in a university. Each student is taking $m$ courses. At the end of the semester, scores of each student in $m$ final exams are reported in an ndarray of shape $(m,n)$. where $m,n \geq 2$'
Compute average and standard deviatoin of all marks in the semester.
def cumulative_avg(marks):
'''
args: ndarray
return: tuple of floats (mean, std_dev)
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return np.sum(marks), np.std(marks)
### END SOLUTION
arr1 = np.array([[2,3,4],[3,42,3]])
assert (cumulative_avg(arr1) == (57, 14.545904349105742))
### BEGIN HIDDEN TESTS
arr2 = np.array([[24,33,4],[43,442,34]])
assert (cumulative_avg(arr2) == (580, 154.91036834964348))
### END HIDDEN TESTS
Given an ndarray of shape $(1, m)$, essentially a row vector, containig $m$ integer elements $\{a_0, a_1, ...a_{m-1}\}$, construct an output ndarray such that $ith$ row of the output contains all 0's
except a 1
at index $a_i \ where \ i \in \{0,1,2..,{m-1}\}$. See the example below for clarifications:
arr1 = np.array([1, 3, 2]).reshape(1,-1) # input array of shape (1,3)
encode(arr1) # Function called
array([[0., 1., 0., 0.], # output array.
[0., 0., 0., 1.],
[0., 0., 1., 0.]])
"The input array contains '3' at index 1, hence the 1st row [0., 0., 0., 1.] in \
the output array contains all 0's but 1 at index '3'. Similarly, for others."
def encode(arr):
'''args: ndarray of shape (1,m)
returns: ndarray of shape (m, y). Figuring out y is part of this problem.
Staff's solution has 7-8 lines of code.
'''
### BEGIN SOLUTION
_, ncols_in_arr = arr.shape
# desired array has shape (ncols_in_arr, max(arr) + 1)
nrows_in_encod = ncols_in_arr
ncols_in_encod = np.max(arr) + 1
# initialize to all zeros first.
encoding_ = np.zeros((ncols_in_arr, ncols_in_encod))
# Append 1's at appropriate places. Using integer indexing
row_idx = np.arange(nrows_in_encod) # e.g. gives [0,1,2,...,4] if nrows_in_encod is 5
col_idx = arr
encoding_[row_idx, col_idx] = 1
return encoding_
### END SOLUTION
arr1 = np.array([1, 4, 3, 2]).reshape(1,-1)
assert (encode(arr1) == np.array([[0., 1., 0., 0., 0.],
[0., 0., 0., 0., 1.],
[0., 0., 0., 1., 0.],
[0., 0., 1., 0., 0.]])).all()
arr2 = np.array([7,4,2,3,1,5,6,0]).reshape(1,-1)
assert (encode(arr2) == np.array([[0., 0., 0., 0., 0., 0., 0., 1.],
[0., 0., 0., 0., 1., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0.],
[1., 0., 0., 0., 0., 0., 0., 0.]])).all()
### BEGIN HIDDEN TESTS
arr2 = np.array([10, 4, 3, 2]).reshape(1,-1)
assert (encode(arr2) == np.array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])).all()
arr3 = np.arange(10000).reshape(1,-1)
assert_equal(encode(arr3).shape, (10000,10000))
### END HIDDEN TESTS
Aside
: If you think of the input array as an array of labels (cat (1), dog (0), chair (2) etc), then the output of the encode()
function you implemented above is a very useful encoding ubiquitous in Machine Learning- commonly used to represent categorical variables. More on that in future.
Consider again ndarray of shape $(m,n)$ containing marks of the final exams for all students from Questions 5,6 and 7. Now, we'd like to make that marks matrix more readable/beautiful. Convert that ndarray into a pandas dataframe. Name the columns of the dataframe as follows: $"Final \ exam \ x" where \ x \in \{1,2,..,n\}$ Actually, it's possible to again convert this df back to numpy ndarray. Do so as well and return a tuple of dataframe and converted ndarray.
def to_df(arr):
'''
arg: ndarray of shape (m,n)
return: tuple (df, ndarray)
Staff's solution contains 4 lines of code.
'''
### BEGIN SOLUTION
total_exams = arr.shape[1]
_col_names = ["Final exam " + str(exam_num) for exam_num in range(1, total_exams+1)]
df = pd.DataFrame(arr, columns=_col_names)
return df, df.values
### END SOLUTION
arr1 = np.array([50, 40, 43, 20]).reshape(1,-1)
assert list(to_df(arr1)[0].columns) == ['Final exam 1', 'Final exam 2', 'Final exam 3', 'Final exam 4']
assert (to_df(arr1)[1] == arr1).all()
### BEGIN HIDDEN TESTS
arr2 = np.array([[1,1],[2,2],[3,2],[4,2],[5,2]])
assert list(to_df(arr2)[0].columns) == ['Final exam 1', 'Final exam 2']
assert (to_df(arr2)[1] == arr2).all()
arr3 = np.ones((10,20))
assert (to_df(arr3)[0].columns == ['Final exam 1', 'Final exam 2', 'Final exam 3', 'Final exam 4',
'Final exam 5', 'Final exam 6', 'Final exam 7', 'Final exam 8',
'Final exam 9', 'Final exam 10', 'Final exam 11', 'Final exam 12',
'Final exam 13', 'Final exam 14', 'Final exam 15', 'Final exam 16',
'Final exam 17', 'Final exam 18', 'Final exam 19', 'Final exam 20']).all()
### END HIDDEN TESTS
Return the number of rows in a dataframe.
def count_rows(df):
'''
args: dataframe
return: int
Staff's solution contains one line of code.
'''
### BEGIN SOLUTION
return len(df)
### END SOLUTION
df1 = to_df(np.ones((4,3)))[0]
assert count_rows(df1) == 4
### BEGIN HIDDEN TESTS
df1 = to_df(np.ones((1114,3)))[0]
assert count_rows(df1) == 1114
df1 = to_df(np.ones((11414,4343)))[0]
assert count_rows(df1) == 11414
### END HIDDEN TESTS