Abstract:Numpy是Python做数据分析所必须要掌握的基础库之一。这篇练习通过89道题目带你快速玩转Numpy。
Numpy是Python做数据分析所必须要掌握的基础库之一。这篇练习通过89道题目带你快速玩转Numpy。
# coding:utf-8 import numpy as np import pandas as pd # 1.Print the numpy version and the configuration print (np.__version__) print np.show_config() # 2. Create a null vector of size 10 Z = np.zeros(10) print Z # 3.Create a null vector of size 10 but the fifth value which is 1 A = np.zeros(10) A[4] = 1 print A # 4.Create a vector with values ranging from 10 to 49 A = np.arange(50) print A # 5.Reverse a vector (first element becomes last) A = np.arange(50) A = A[::-1] print A # 6.Create a 3x3 matrix with values ranging from 0 to 8 Z = np.arange(9).reshape(3,3) print Z # 7.Find indices of non-zero elements from [1,2,0,0,4,0] nz = np.nonzero([1,2,0,0,4,0]) print (nz) # 8.Create a 3x3 identity matrix A = np.eye(3) print A # 9.create a 3*3*3 array with random values A = np.random.random((3,3,3)) print (Z) # 10.Create a 10x10 array with random values and find the minimum and maximum values Z = np.random.random((10,10)) Zmin,Zmax = Z.min(), Z.max() print (Zmin,Zmax) # 11.Create a random vector of size 30 and find the mean value A = np.random.random(30) m = A.mean() print (m) # 12.Create a 2d array with 1 on the border and 0 inside Z = np.ones((10,10)) Z[1:-1,1:-1] = 0 print(Z) # 13.How to add a border (filled with 0's) around an existing array? A = np.ones((5,5)) # ones(): 返回给定形状和类型的新数组,用数字填充 A = np.pad(A,pad_width=1, mode='constant', constant_values=0) # pad():filling the array print A # 14.What is the result of the following expression? print (0 * np.nan) print (np.nan == np.nan) print (np.nan - np.nan) print (0.3 == 3 * 0.1) # 15.Create a 5x5 matrix with values 1,2,3,4 just below the diagonal A = np.diag(1+np.arange(4),k=-1) print A # 16.Create a 8x8 matrix and fill it with a checkerboard pattern A = np.zeros((8,8),dtype=int) A[1::2,::2] = 1 A[::2,1::2] = 1 print A # 17.Consider a (6,7,8) shape array, what is the index (x,y,z) of the 100th element print (np.unravel_index(100,(6,7,8))) # unravel_index(): 将平面索引的平面索引或数组转换为坐标数组的元组 # 18.Create a checkerboard 8x8 matrix using the tile function Z = np.tile( np.array([[0,1],[1,0]]), (4,4)) print(Z) # 19.Normalize a 5x5 random matrix Z = np.random.random((5,5)) Zmax, Zmin = Z.max(), Z.min() Z = (Z - Zmin)/(Zmax - Zmin) print(Z) # 20. Create a custom dtype that describes a color as four unsigned bytes (RGBA) color = np.dtype([("r", np.ubyte, 1), ("g", np.ubyte, 1), ("b", np.ubyte, 1), ("a", np.ubyte, 1)]) # 21.Multiply a 5x3 matrix by a 3x2 matrix (real matrix product) A = np.dot(np.ones((5,3)),np.ones((3,2))) print A B = np.dot(np.ones((1,2)),np.ones((2,2))) print B # 22.Given a 1D array, negate all elements which are between 3 and 8, in place. A = np.arange(11) A[(3 < A) & (A <= 8)] *= -1 print A # 23.What is the output of the following script? print (sum(range(5),-1)) # 24.What are the result of the following expressions? print(np.array(0) / np.array(0)) print(np.array(0) // np.array(0)) print(np.array([np.nan]).astype(int).astype(float)) # 25.How to round away from zero a float array ? Z = np.random.uniform(-10,+10,10) print (np.copysign(np.ceil(np.abs(Z)), Z)) # 26.How to find common values between two arrays? A1 = np.random.randint(0,10,10) A2 = np.random.randint(0,10,10) print A1,A2 print (np.intersect1d(A1,A2)) # 27.How to ignore all numpy warnings (not recommended)? defaults = np.seterr(all="ignore") Z = np.ones(1) / 0 _ = np.seterr(**defaults) with np.errstate(divide='ignore'): Z = np.ones(1) / 0 # 28. Is the following expressions true? # numpy.sqrt():按元素方式返回数组的正平方根 print np.sqrt(-1) == np.emath.sqrt(-1) # 29. How to get the dates of yesterday, today and tomorrow? yesterday = np.datetime64('today', 'D') - np.timedelta64(1, 'D') today = np.datetime64('today', 'D') tomorrow = np.datetime64('today', 'D') + np.timedelta64(1, 'D') print yesterday,today,tomorrow # 30.How to get all the dates corresponding to the month of July 2016? A = np.arange('2018-01','2018-02',dtype='datetime64[D]') print A A = np.arange('2019-03','2019-05',dtype='datetime64[D]') print A # 31.How to compute ((A+B)*(-A/2)) in place (without copy)? A = np.ones(3)*1 B = np.ones(3)*2 C = np.ones(3)*3 print np.add(A,B,out=B) print np.divide(A,2,out=A) print np.negative(A,out=A) print np.multiply(A,B,out=A) # 32.Extract the integer part of a random array using 5 different methods # uniform():从均匀分布绘制样本 # floor():逐元素地返回输入的底 # ceil():元素方式返回输入的上限 # astype():数组的复制,强制转换为指定的类型 # trunc():按元素方式返回输入的截断值 Z = np.random.uniform(0,10,10) print Z print (Z - Z%1) print (np.floor(Z)) print (np.ceil(Z) - 1) print (Z.astype(int)) print (np.trunc(Z)) # 33.Create a 5x5 matrix with row values ranging from 0 to 4 A = np.zeros((5,5)) A += np.arange(5) print A # 34.onsider a generator function that generates 10 integers and use it to build an array # zeros():返回给定形状和类型的新数组,用零填充 # numpy.fromiter():从可迭代对象创建新的1维数组 def generate(): for x in range(10): yield x Z = np.fromiter(generate(),dtype=float,count=-1) print Z # 35.Create a vector of size 10 with values ranging from 0 to 1, both excluded # linsapce():在指定的间隔内返回均匀间隔的数字 Z = np.linspace(0,1,11,endpoint=False)[1:] print(Z) # 36.Create a random vector of size 10 and sort it Z = np.random.random(10) Z.sort() print Z # 37.How to sum a small array faster than np.sum? # add():按元素添加参数 Z = np.arange(10) print np.add.reduce(Z) # 38.Consider two random array A and B, check if they are equal # allclose():如果两个数组在元素级别在容差内相等,则返回True # array_equal():如果两个数组具有相同的形状和元素,则为True,否则为False A = np.random.randint(0,2,5) B = np.random.randint(0,2,5) equal = np.allclose(A,B) print (equal) equal = np.array_equal(A,B) print (equal) C = np.random.randint(0,10,10) D = np.random.randint(0,10,10) print C,D print np.allclose(C,D) print np.array_equal(C,D) # 39.Make an array immutable (read-only) # writeable:确保返回的数组可以写入 Z = np.zeros(10) Z.flags.writeable = False # 40.Consider a random 10x2 matrix representing cartesian coordinates, convert them to polar coordinates A = np.random.random((10,2)) X,Y = A[:,0],A[:,1] R = np.sqrt(X**2+Y**2) T = np.arctan2(Y,X) print (R) print (T) # 41.Create random vector of size 10 and replace the maximum value by 0 # argmax():返回沿轴的最大值的索引 Z = np.random.random(10) Z[Z.argmax()] = 0 print (Z) # 42.Create a structured array with x and y coordinates covering the [0,1]x[0,1] area # meshgrid():从坐标向量返回坐标矩阵 Z = np.zeros((5,5), [('x',float),('y',float)]) Z['x'], Z['y'] = np.meshgrid(np.linspace(0,1,5), np.linspace(0,1,5)) print(Z) # 43.Given two arrays, X and Y, construct the Cauchy matrix C (Cij =1/(xi - yj)) # outer():计算两个向量的外积 # numpy.linalg.det():计算数组的行列式 X = np.arange(8) Y = X + 0.5 C = 1.0 / np.subtract.outer(X,Y) print (np.linalg.det(C)) # 44.Print the minimum and maximum representable value for each numpy scalar type for dtype in [np.int8, np.int32, np.int64]: print (np.iinfo(dtype).min) print (np.iinfo(dtype).max) for dtype in [np.float32, np.float64]: print (np.finfo(dtype).min) print (np.finfo(dtype).max) print (np.finfo(dtype).eps) # 45.Print the minimum and maximum representable value for each numpy scalar type np.set_printoptions(threshold=np.nan) Z = np.zeros((8,8)) print Z # 46.How to find the closest value (to a given scalar) in a vector? A = np.arange(100) v = np.random.uniform(0,100) index = (np.abs(A - v)).argmin() print (A[index]) # 47.创建一个表示位置(x,y)和颜色(r,g,b)的结构化数组 Z = np.zeros(10, [ ('position', [ ('x', float, 1), ('y', float, 1)]), ('color', [ ('r', float, 1), ('g', float, 1), ('b', float, 1)])]) print (Z) # 48.对一个表示坐标形状为(100,2)的随机向量,找到点与点的距离 Z = np.random.random((10,2)) X,Y = np.atleast_2d(Z[:,0], Z[:,1]) D = np.sqrt((X-X.T)**2 + (Y-Y.T)**2) print (D) # 49.如何将32位的浮点数(float)转换为对应的整数(integer)? A = np.arange(10,dtype=np.int32) A = A.astype(np.float32, copy=False) print A # 50.对于numpy数组,enumerate的等价操作是什么? # enumerate:多维索引迭代器;返回迭代器产生数组坐标和值的对 Z = np.arange(9).reshape(3,3) print Z for index,value in np.ndenumerate(Z): print (index, value) for index in np.ndindex(Z.shape): print (index, Z[index]) # 51.生成一个通用的二维Gaussian-like数组 X, Y = np.meshgrid(np.linspace(-1,1,10), np.linspace(-1,1,10)) D = np.sqrt(X*X+Y*Y) sigma, mu = 1.0, 0.0 G = np.exp(-( (D-mu)**2 / ( 2.0 * sigma**2 ) ) ) print (G) # 52.对一个二维数组,如何在其内部随机放置p个元素? # numpy.random.choice():从给定的1-D数组生成随机样本 # numpy.put():用给定值替换数组的指定元素 n = 10 p = 4 A = np.zeros((n,n)) np.put(A, np.random.choice(range(n*n), p, replace=False), 1) print A # 53.减去一个矩阵中的每一行的平均值 X = np.random.rand(5,10) Y = X - X.mean(axis=1,keepdims=True) print Y Y = X - X.mean(axis=1).reshape(-1,1) print Y # 54.如何通过第n列对一个数组进行排序? Z = np.random.randint(0,10,(3,3)) print Z print (Z[Z[:,1].argsort()]) # 55.如何检查一个二维数组是否有空列? Z = np.random.randint(0,3,(3,10)) print ((~Z.any(axis=0).any())) # 56.如何用迭代器(iterator)计算两个分别具有形状(1,3)和(3,1)的数组? A = np.arange(3).reshape(3,1) B = np.arange(3).reshape(1,3) it = np.nditer([A,B,None]) for x,y,z in it: z[...] = x + y print (it.operands[2]) # 57.创建一个具有name属性的数组类 class NamedArray(np.ndarray): def __new__(cls, array, name="no name"): obj = np.asarray(array).view(cls) obj.name = name return obj def __array__finalize__(self, obj): if obj is None: return self.info = getattr(obj, 'name', "no name") Z = NamedArray(np.arange(10), "range_10") print (Z.name) # 58.考虑一个给定的向量,如何对由第二个向量索引的每个元素加1(小心重复的索引)? Z = np.ones(10) I = np.random.randint(0,len(Z),20) Z += np.bincount(I, minlength=len(Z)) print(Z) # 59.根据索引列表(I),如何将向量(X)的元素累加到数组(F)? # numpy.bincount():计算非负整数数组中每个值的出现次数 X = [1,2,3,4,5,6] I = [1,3,5,7,9,4] F = np.bincount(I,X) print F # 60.考虑一个(dtype=ubyte) 的 (w,h,3)图像,计算其唯一颜色的数量 w,h = 16,16 I = np.random.randint(0,2,(h,w,3)).astype(np.ubyte) F = I[...,0]*(256*256) + I[...,1]*256 +I[...,2] n = len(np.unique(F)) print n # 61.考虑一个四维数组,如何一次性计算出最后两个轴(axis)的和? # randint():将随机整数从低(包括)返回到高(不包含). A = np.random.randint(0,10,(3,4,3,4)) print A sum = A.sum(axis=(-2,-1)) print sum # 62.考虑一个一维向量D,如何使用相同大小的向量S来计算D子集的均值? D = np.random.uniform(0,1,100) S = np.random.randint(0,10,100) D_sums = np.bincount(S) D_counts = np.bincount(S) D_means = D_sums / D_counts print (D_means) print (pd.Series(D).groupby(S).mean()) # 63.如何获得点积 dot prodcut的对角线? # np.sum A = np.random.uniform(0,1,(5,5)) B = np.random.uniform(0,1,(5,5)) print np.sum(A*B.T, axis=1) # np.diag print np.diag(np.dot(A,B)) # np.einsum() print np.einsum("ij,ji->i", A, B) # 64.考虑一个向量[1,2,3,4,5],如何建立一个新的向量,在这个新向量中每个值之间有3个连续的零? Z = np.array([1,2,3,4,5]) nz = 3 Z0 = np.zeros(len(Z) + (len(Z)-1)*(nz)) Z0[::nz+1] = Z print Z0 # 65.考虑一个维度(5,5,3)的数组,如何将其与一个(5,5)的数组相乘? A = np.arange(25).reshape(5,5) A[0,1] = A[1,0] print A # 66.考虑一个可以描述10个三角形的triplets,找到可以分割全部三角形的line segment faces = np.random.randint(0,100,(10,3)) F = np.roll(faces.repeat(2,axis=1),-1,axis=1) F = np.sort(F,axis=1) G = F.view( dtype=[('p0',F.dtype),('p1',F.dtype)] ) G = np.unique(G) print G # 67.给定一个二进制的数组C,如何产生一个数组A满足np.bincount(A)==C # np.repeat C = np.bincount([1,1,2,3,4,4,6]) A = np.repeat(np.arange(len(C)), C) print A # 68.如何通过滑动窗口计算一个数组的平均数? # np.cumsum def moving_average(a,n=3): ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n Z = np.arange(20) print (moving_average(Z, n=3)) # 69.Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) # from.numpy.lib from numpy.lib import stride_tricks def rolling(a, window): shape = (a.size - window + 1, window) strides = (a.itemsize, a.itemsize) return stride_tricks.as_strided(a, shape=shape, strides=strides) Z = rolling(np.arange(10),3) print Z # 70.如何对布尔值取反,或者原位(in-place)改变浮点数的符号(sign)? # np.logical_not, np.negative Z = np.random.randint(0,2,100) print np.logical_not(Z,out=Z) Z = np.random.uniform(-1.0,1.0,100) print np.negative(Z, out=Z) # 71.考虑两组点集P0和P1去描述一组线(二维)和一个点p,如何计算点p到每一条线 i (P0[i],P1[i])的距离? def distance(P0,P1,p): T = P1 - P0 L = (T**2).sum(axis=1) U = -((P0[:,0]-p[...,0])*T[:,0] + (P0[:,1]-p[...,1])*T[:,1]) / L U = U.reshape(len(U),1) D = P0 + U*T - p return np.sqrt((D**2).sum(axis=1)) P0 = np.random.uniform(-10,10,(10,2)) P1 = np.random.uniform(-10,10,(10,2)) p = np.random.uniform(-10,10,(1,2)) print (distance(P0,P1,p)) # 72.考虑两组点集P0和P1去描述一组线(二维)和一组点集P,如何计算每一个点 j(P[j]) 到每一条线 i (P0[i],P1[i])的距离? P0 = np.random.uniform(-10,10,(10,2)) P1 = np.random.uniform(-10,10,(10,2)) p = np.random.uniform(-10, 10, (10,2)) print (np.array([distance(P0,P1,p_i) for p_i in p])) # 73.虑一个数组Z = [1,2,3,4,5,6,7,8,9,10,11,12,13,14],如何生成一个数组R = [[1,2,3,4], [2,3,4,5], [3,4,5,6], ...,[11,12,13,14]]? # stride_tricks.as_strided Z = np.arange(1,15,dtype=np.uint32) R = stride_tricks.as_strided(Z,(11,4),(4,4)) print R # 74.计算一个矩阵的秩 # np.linalg.svd Z = np.random.uniform(0,1,(10,10)) U, S, V = np.linalg.svd(Z) rank = np.sum(S > 1e-10) print rank # 75.如何找到一个数组中出现频率最高的值? # np.bincount, argmax Z = np.random.randint(0,10,50) print (np.bincount(Z).argmax()) # 76.从一个10x10的矩阵中提取出连续的3x3区块 # stride_tricks.as_strided Z = np.random.randint(0,5,(10,10)) n = 3 i = 1 + (Z.shape[0]-3) j = 1 + (Z.shape[1]-3) C = stride_tricks.as_strided(Z,shape=(i,j,n,n),strides=Z.strides + Z.strides) print C # 77.创建一个满足 Z[i,j] == Z[j,i]的子类 # class class Symetric(np.ndarray): def __setitem__(self, index, value): i,j = index super(Symetric, self).__setitem__((i,j), value) super(Symetric, self).__setitem__((j,i), value) def symetric(Z): return np.asarray(Z + Z.T - np.diag(Z.diagonal())).view(Symetric) S = symetric(np.random.randint(0,10,(5,5))) S[2,3] = 42 print (S) # 78.考虑p个 nxn 矩阵和一组形状为(n,1)的向量,如何直接计算p个矩阵的乘积(n,1)? # np.tensordot p,n = 10,20 M = np.ones((p,n,n)) V = np.ones((p,n,1)) S = np.tensordot(M,V,axes=[[0,2],[0,1]]) print S # 79.对于一个16x16的数组,如何得到一个区域(block-sum)的和(区域大小为4x4)? # np.add.reduceat Z = np.ones((16,16)) k = 4 S = np.add.reduceat(np.add.reduceat(Z, np.arange(0, Z.shape[0], k), axis=0), np.arange(0, Z.shape[1], k), axis=1) print (S) # 80.如何利用numpy数组实现Game of Life def iterate(Z): # Count neighbours N = (Z[0:-2,0:-2] + Z[0:-2,1:-1] + Z[0:-2,2:] + Z[1:-1,0:-2] + Z[1:-1,2:] + Z[2: ,0:-2] + Z[2: ,1:-1] + Z[2: ,2:]) # Apply rules birth = (N==3) & (Z[1:-1,1:-1]==0) survive = ((N==2) | (N==3)) & (Z[1:-1,1:-1]==1) Z[...] = 0 Z[1:-1,1:-1][birth | survive] = 1 return Z Z = np.random.randint(0,2,(50,50)) for i in range(100): Z = iterate(Z) print (Z) # 81.如何找到一个数组的第n个最大值? # np.argsort # numpy.random.shuffle():通过随机播放其内容来修改序列 Z = np.arange(10000) np.random.shuffle(Z) n = 5 print (Z[np.argsort(Z)[-n:]]) # 82.给定任意个数向量,创建笛卡尔积(每一个元素的每一种组合)( # np.indices def cartesian(arrays): arrays = [np.asarray(a) for a in arrays] shape = (len(x) for x in arrays) ix = np.indices(shape, dtype=int) ix = ix.reshape(len(arrays), -1).T for n, arr in enumerate(arrays): ix[:, n] = arrays[n][ix[:, n]] return ix print (cartesian(([1, 2, 3], [4, 5], [6, 7]))) # 82.如何从一个正常数组创建记录数组(record array)? np.core.records.fromarrays Z = np.array([("Hello", 2.5, 3), ("World", 3.6, 2)]) R = np.core.records.fromarrays(Z.T, names='col1, col2, col3', formats = 'S8, f8, i8') print (R) # 83.考虑一个大向量Z, 用三种不同的方法计算它的立方 # np.power:第一个数组元素从第二个数组提升到权力,逐元素 x = np.random.rand() print np.power(x,3) # 84. 考虑一个10x3的矩阵,分解出有不全相同值的行 (如 [2,2,3]) Z = np.random.randint(0,5,(10,3)) print Z E = np.all(Z[:,1:] == Z[:,:-1], axis=1) U = Z[~E] print (U) U = Z[Z.max(axis=1) != Z.min(axis=1),:] print (U) # 85.将一个整数向量转换为matrix binary的表现形式 print (np.unpackbits(I[:, np.newaxis], axis=1)) # 86.给定一个二维数组,如何提取出唯一的(unique)行? # np.ascontiguousarray Z = np.random.randint(0,2,(6,3)) T = np.ascontiguousarray(Z).view(np.dtype((np.void, Z.dtype.itemsize * Z.shape[1]))) _, idx = np.unique(T, return_index=True) uZ = Z[idx] print (uZ) # 87.考虑两个向量A和B,写出用einsum等式对应的inner, outer, sum, mul函数 # np.einsum A = np.random.uniform(0,1,10) B = np.random.uniform(0,1,10) print ('sum') print (np.einsum('i->', A))# np.sum(A) print ('A * B') print (np.einsum('i,i->i', A, B)) # A * B print ('inner') print (np.einsum('i,i', A, B)) # np.inner(A, B) print ('outer') print (np.einsum('i,j->ij', A, B)) # np.outer(A, B) # 88. 考虑一个由两个向量描述的路径(X,Y),如何用等距样例(equidistant samples)对其进行采样(sample)? # Considering a path described by two vectors (X,Y), how to sample it using equidistant samples np.cumsum, np.interp) phi = np.arange(0, 10*np.pi, 0.1) a = 1 x = a*phi*np.cos(phi) y = a*phi*np.sin(phi) dr = (np.diff(x)**2 + np.diff(y)**2)**.5 # segment lengths r = np.zeros_like(x) r[1:] = np.cumsum(dr) # integrate path r_int = np.linspace(0, r.max(), 200) # regular spaced path x_int = np.interp(r_int, r, x) # integrate path y_int = np.interp(r_int, r, y) # 89.对于一个一维数组X,计算它boostrapped之后的95%置信区间的平均 # np.percentile X = np.random.randn(100) # random 1D array N = 1000 # number of bootstrap samples idx = np.random.randint(0, X.size, (N, X.size)) means = X[idx].mean(axis=1) confint = np.percentile(means, [2.5, 97.5]) print (confint)
这100道练习,带你玩转Numpy
100 numpy exercises
btw:欢迎关注 ~
Github: https://github.com/ScarlettYellow
个人博客:https://scarletthuang.cn/