此段代码来自于B站up技术蛋老师(https://space.bilibili.com/327247876)的视频 — Python Numpy入门精华(https://www.bilibili.com/video/BV1xK411X7ZQ/)
这个老师的视频做的很良心,推荐去看
import numpy as np
1.列表与Numpy数组的区别
python_list1 = [1, 2, 3, 4]
python_list2 = [10, 20, 30, 40]
print( python_list1 + python_list2 )
[1, 2, 3, 4, 10, 20, 30, 40]
python_list3 = [(i1 + i2) for i1, i2 in zip(python_list1, python_list2)]
print( python_list3 )
# zip()函数
# list(zip(python_list1, python_list2)) # [(1, 10), (2, 20), (3, 30), (4, 40)]
# 更多参考 https://www.runoob.com/python3/python3-func-zip.html
[11, 22, 33, 44]
# 列表转成Numpy数组
numpy_array1 = np.array([1, 2, 3, 4])
numpy_array2 = np.array([10, 20, 30, 40])
print( numpy_array1 + numpy_array2 )
[11 22 33 44]
list_long = list(range(100000))
%timeit [(item * 2) for item in list_long]
array_long = np.array(list_long)
%timeit array_long * 2
# %timeit 是IPython的特殊命令
# 详见 这里总结了50个使用IPython的好处,大大提升开发效率! https://www.zhihu.com/question/51467397/answer/1098115714
7.48 ms ± 36.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
59.8 µs ± 202 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
2.维度和形状
1D = 0D + 0D + 0D + …
2D = 1D + 1D + 1D + …
2D = 2D + 2D + 2D + …
…
array_0D = np.array(1)
print( array_0D )
array_1D = np.array([1, 2, 3])
print( '-'*10, '\n', array_1D )
array_2D = np.array([[1, 2, 3], [4, 5, 6]])
print( '-'*10, '\n', array_2D )
array_3D = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
print( '-'*10, '\n', array_3D )
1
----------
[1 2 3]
----------
[[1 2 3]
[4 5 6]]
----------
[[[1 2 3]
[4 5 6]]
[[1 2 3]
[4 5 6]]]
print( array_0D.ndim )
print( array_1D.ndim )
print( array_2D.ndim )
print( array_0D.shape )
print( array_1D.shape )
print( array_2D.shape )
0
1
2
()
(3,)
(2, 3)
3.序列与重塑
# 左闭右开,可设置步长
arr = np.arange(0, 100, 2)
print( arr )
print( arr.dtype )
[ 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46
48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94
96 98]
int32
arr = np.arange(0, 100, 2.5)
print( arr )
print( arr.dtype )
[ 0. 2.5 5. 7.5 10. 12.5 15. 17.5 20. 22.5 25. 27.5 30. 32.5
35. 37.5 40. 42.5 45. 47.5 50. 52.5 55. 57.5 60. 62.5 65. 67.5
70. 72.5 75. 77.5 80. 82.5 85. 87.5 90. 92.5 95. 97.5]
float64
# 查看数组的元素数量(size属性)
# 数组变形前一定要看一看数组的size 确保能对的上
print( arr.size )
arr_2D = arr.reshape(5,8)
print( arr_2D )
arr_3D = arr.reshape(2,4,5)
print( arr_3D )
40
[[ 0. 2.5 5. 7.5 10. 12.5 15. 17.5]
[20. 22.5 25. 27.5 30. 32.5 35. 37.5]
[40. 42.5 45. 47.5 50. 52.5 55. 57.5]
[60. 62.5 65. 67.5 70. 72.5 75. 77.5]
[80. 82.5 85. 87.5 90. 92.5 95. 97.5]]
[[[ 0. 2.5 5. 7.5 10. ]
[12.5 15. 17.5 20. 22.5]
[25. 27.5 30. 32.5 35. ]
[37.5 40. 42.5 45. 47.5]]
[[50. 52.5 55. 57.5 60. ]
[62.5 65. 67.5 70. 72.5]
[75. 77.5 80. 82.5 85. ]
[87.5 90. 92.5 95. 97.5]]]
4.设置固定值
zeros_0D = np.zeros((),dtype='int32')
zeros_1D = np.zeros(1)
# zeros_1D = np.zeros((1,)) # 和上面的写法等价
print( zeros_0D )
print( zeros_1D )
0
[0.]
ones = np.ones((2,3), dtype=np.int16) # 和dtype='int16'效果一样
print( ones )
[[1 1 1]
[1 1 1]]
nines = np.full((3,3), 9)
print( nines )
[[9 9 9]
[9 9 9]
[9 9 9]]
5.数据类型
# 注意类型的取值范围,否则会有溢出风险
arr = np.array([0, 1, 32767])
print( arr )
print( arr.dtype )
arr2 = np.array([0, 1, 32767+1], dtype='int16')
print( arr2 )
print( arr2.dtype )
[ 0 1 32767]
int32
[ 0 1 -32768]
int16
6.随机数
# 取值范围[0.0,1)的浮点数
random_arr1 = np.random.random((3,4,5))
print( random_arr1 )
[[[0.4886442 0.72728616 0.87807748 0.44123081 0.48527117]
[0.88290388 0.62528213 0.28645992 0.0739232 0.45536353]
[0.69724842 0.60985454 0.18877139 0.78754942 0.29011158]
[0.86860128 0.40265775 0.58179793 0.30110881 0.46906258]]
[[0.19105051 0.9495605 0.38240658 0.3390702 0.84465178]
[0.62668747 0.96483084 0.57429467 0.49413892 0.08760963]
[0.99841977 0.22288066 0.66710384 0.49018811 0.54936051]
[0.64283131 0.45061287 0.3389911 0.76284574 0.29129994]]
[[0.87851221 0.45601815 0.41775379 0.94792158 0.13049978]
[0.59642401 0.23806206 0.57647557 0.79502845 0.87306712]
[0.77632754 0.8392334 0.55298989 0.26138086 0.97001368]
[0.52822754 0.57392785 0.27968511 0.76556942 0.42062802]]]
# Return a sample (or samples) from the "standard normal" distribution.
# 标准正态分布
random_arr2 = np.random.randn(3,4,5)
print( random_arr2 )
[[[ 0.16582414 -0.5015488 -1.70928753 0.9464795 0.73221356]
[ 0.33959013 -0.17694127 1.50668233 1.3588292 0.50581724]
[-0.35953226 0.5068349 -2.00201338 -0.80015586 1.2165169 ]
[ 0.02186408 0.27449598 0.61429625 -0.78573229 -0.86533094]]
[[ 0.04900541 -0.1592978 0.82502207 1.16772055 -0.71490851]
[ 0.82113987 -0.06749592 1.13770099 -0.18931726 -0.71091965]
[-1.81822172 -0.82666248 1.59172575 0.55207281 0.3434944 ]
[ 1.12813987 -0.31580281 -0.37460455 0.63562808 -0.97394781]]
[[ 0.877994 -1.18584087 0.21646484 -0.70181109 0.97325045]
[-0.88755061 -0.65336685 0.09534154 1.23654304 0.19405336]
[ 2.61249664 -1.52579744 1.61827559 0.74583308 0.80894465]
[-0.97862698 -0.58153689 -0.87124485 -1.03345774 0.16262545]]]
# ([起点, 终点), 形状) 的整数数组
random_arr3 = np.random.randint(2, 4, size=(3,4,5))
print( random_arr3 )
[[[2 2 3 3 2]
[2 3 2 3 3]
[2 3 2 3 2]
[3 3 3 3 2]]
[[3 3 2 3 2]
[3 2 2 3 3]
[2 2 2 3 3]
[3 2 3 3 3]]
[[3 3 2 2 2]
[3 2 3 3 2]
[2 2 2 3 3]
[3 2 2 3 2]]]
7.索引
row_Students = [1, 2, 3, 4, 5, 6]
row_Math = [66, 34, 88, 96, 57, 70]
row_English = [58, 22, 100, 49, 65, 34]
row_Sports = [99, 84, 48, 38, 25 , 56]
students = np.array([row_Students, row_Math, row_English, row_Sports])
print( students )
print( students.shape )
[[ 1 2 3 4 5 6]
[ 66 34 88 96 57 70]
[ 58 22 100 49 65 34]
[ 99 84 48 38 25 56]]
(4, 6)
# 获得所有学生4的所有成绩
print( students[1:, 3] )
# 等价于 print( students[1:4][3] )
# 获得所有学生1 3 5及其所有成绩
print( students[:, ::2] )
# print( students[0:4, 0::2] )
# ::2 表示从0开始间隔2
[96 49 38]
[[ 1 3 5]
[ 66 88 57]
[ 58 100 65]
[ 99 48 25]]
print( students > 60 )
# 索引掩码
mask = ( students >= 60 ) & ( students < 90 )
print( mask )
# 返回的是符合条件的值的列表
print(students[mask])
# 想要知道具体的位置,使用where函数,还可以指定填充不符合条件位置的显示值
print( np.where(mask, students, np.nan) )
# 返回掩码为True的数组元素的索引位置
print( np.argwhere(mask) )
[[False False False False False False]
[ True False True True False True]
[False False True False True False]
[ True True False False False False]]
[[False False False False False False]
[ True False True False False True]
[False False False False True False]
[False True False False False False]]
[66 88 70 65 84]
[[nan nan nan nan nan nan]
[66. nan 88. nan nan 70.]
[nan nan nan nan 65. nan]
[nan 84. nan nan nan nan]]
[[1 0]
[1 2]
[1 5]
[2 4]
[3 1]]
8.广播
eye = np.eye(5)
print( eye )
[[1. 0. 0. 0. 0.]
[0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
# 为每一行的元素依次加上0.1, 0.2, 0.3, 0.4, 0.5
# (5,5) + (5,)
print( eye + [0.1, 0.2, 0.3, 0.4, 0.5] )
[[1.1 0.2 0.3 0.4 0.5]
[0.1 1.2 0.3 0.4 0.5]
[0.1 0.2 1.3 0.4 0.5]
[0.1 0.2 0.3 1.4 0.5]
[0.1 0.2 0.3 0.4 1.5]]
# 为每一行整体分别加上 10, 20, 30, 40, 50
# (5,5) + (5,1)
print( eye + [[10 ],[20],[30],[40],[50]] )
[[11. 10. 10. 10. 10.]
[20. 21. 20. 20. 20.]
[30. 30. 31. 30. 30.]
[40. 40. 40. 41. 40.]
[50. 50. 50. 50. 51.]]
arr = np.random.randint(1, 100, size=(5,4))
print( arr )
[[70 69 65 48]
[ 2 46 54 48]
[ 2 3 59 71]
[70 2 38 68]
[67 4 79 54]]
# 数组元素求和
print( arr.sum() )
919
9.轴向
# 返回的shape是(4,)
print( arr.max(axis=0) )
# 返回的shape是(5,)
print( arr.max(axis=1) )
[70 69 79 71]
[70 54 71 70 79]
# 三维数组情况
arr = np.random.randint(1, 10, size=(3,2,3))
print( arr )
[[[6 4 4]
[9 2 1]]
[[2 2 1]
[5 2 5]]
[[3 4 7]
[1 6 2]]]
# arr.shape = (3, 2, 3)
# axis=0 返回的shape是(2, 3)
print( arr.max(axis=0) )
# axis=1 返回的shape是(3, 3)
print( arr.max(axis=1) )
# axis=2 返回的shape是(3, 2)
print( arr.max(axis=2) )
[[6 4 7]
[9 6 5]]
[[9 4 4]
[5 2 5]
[3 6 7]]
[[6 9]
[2 5]
[7 6]]
10.拷贝
arr = np.linspace(0, 5, 10)
print( arr )
[0. 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
# view方法拷贝
arr_view = arr.view()
print( arr_view )
[0. 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
# view方法的特点:对view拷贝后的数组的操作会改变原数组,原数组的修改也会改变view拷贝后的数组
arr_view[0] = np.nan
print( arr )
print( arr_view)
[ nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
[ nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
# copy拷贝
arr_copy = arr.copy()
print( arr_copy )
[ nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
# copy方法就不会互相影响
arr_copy[0] = 0.
print( arr )
print( arr_copy)
[ nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
[0. 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
3.33333333 3.88888889 4.44444444 5. ]
11.更新与删除
# 数组不局限于数字
arr = np.array(['鸡蛋', '鸭蛋', '龟蛋', '鸵鸟蛋', '鸭蛋', '龟蛋', '鸵鸟蛋'])
# 数组去重
arr2 =np.unique(arr)
print( arr2 )
['鸡蛋' '鸭蛋' '鸵鸟蛋' '龟蛋']
arr3 = np.array(['好蛋', '坏蛋'])
# 数组合并
print( np.concatenate([arr2,arr3]) )
# 指定位置合并
# 在arr2第2个位置开始插入arr3元素
print(np.insert(arr2, 1, arr3))
# 删除元素
# 操作是副本进行的,需要有变量接收删除操作后的返回值
arr4 = np.delete(arr3,0)
print( arr3 )
print( arr4 )
['鸡蛋' '鸭蛋' '鸵鸟蛋' '龟蛋' '好蛋' '坏蛋']
['鸡蛋' '好蛋' '坏蛋' '鸭蛋' '鸵鸟蛋' '龟蛋']
['好蛋' '坏蛋']
['坏蛋']