Numpy

此段代码来自于B站up技术蛋老师(https://space.bilibili.com/327247876)的视频 — Python Numpy入门精华(https://www.bilibili.com/video/BV1xK411X7ZQ/)
这个老师的视频做的很良心,推荐去看

import numpy as np

1.列表与Numpy数组的区别

python_list1 = [1, 2, 3, 4] 
python_list2 = [10, 20, 30, 40] 
print( python_list1 + python_list2 )
[1, 2, 3, 4, 10, 20, 30, 40]
python_list3 = [(i1 + i2) for i1, i2 in zip(python_list1, python_list2)]
print( python_list3 )
# zip()函数
# list(zip(python_list1, python_list2)) # [(1, 10), (2, 20), (3, 30), (4, 40)]
# 更多参考 https://www.runoob.com/python3/python3-func-zip.html
[11, 22, 33, 44]
# 列表转成Numpy数组
numpy_array1 = np.array([1, 2, 3, 4])
numpy_array2 = np.array([10, 20, 30, 40])
print( numpy_array1 + numpy_array2 )
[11 22 33 44]
list_long = list(range(100000))
%timeit [(item * 2) for item in list_long]

array_long = np.array(list_long)
%timeit array_long * 2

# %timeit 是IPython的特殊命令
# 详见 这里总结了50个使用IPython的好处,大大提升开发效率! https://www.zhihu.com/question/51467397/answer/1098115714
7.48 ms ± 36.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
59.8 µs ± 202 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)

2.维度和形状
1D = 0D + 0D + 0D + …
2D = 1D + 1D + 1D + …
2D = 2D + 2D + 2D + …

array_0D = np.array(1)
print( array_0D )
array_1D = np.array([1, 2, 3])
print( '-'*10, '\n', array_1D )
array_2D = np.array([[1, 2, 3], [4, 5, 6]])
print( '-'*10, '\n', array_2D )
array_3D = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
print( '-'*10, '\n', array_3D )

1
---------- 
 [1 2 3]
---------- 
 [[1 2 3]
 [4 5 6]]
---------- 
 [[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]
print( array_0D.ndim )
print( array_1D.ndim )
print( array_2D.ndim )
print( array_0D.shape )
print( array_1D.shape )
print( array_2D.shape )
0
1
2
()
(3,)
(2, 3)

3.序列与重塑

# 左闭右开,可设置步长
arr = np.arange(0, 100, 2) 
print( arr )
print( arr.dtype )

[ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46
 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94
 96 98]
int32
arr = np.arange(0, 100, 2.5) 
print( arr )
print( arr.dtype )
[ 0.   2.5  5.   7.5 10.  12.5 15.  17.5 20.  22.5 25.  27.5 30.  32.5
 35.  37.5 40.  42.5 45.  47.5 50.  52.5 55.  57.5 60.  62.5 65.  67.5
 70.  72.5 75.  77.5 80.  82.5 85.  87.5 90.  92.5 95.  97.5]
float64
# 查看数组的元素数量(size属性)
# 数组变形前一定要看一看数组的size 确保能对的上
print( arr.size )
arr_2D = arr.reshape(5,8)
print( arr_2D )
arr_3D = arr.reshape(2,4,5)
print( arr_3D )
40
[[ 0.   2.5  5.   7.5 10.  12.5 15.  17.5]
 [20.  22.5 25.  27.5 30.  32.5 35.  37.5]
 [40.  42.5 45.  47.5 50.  52.5 55.  57.5]
 [60.  62.5 65.  67.5 70.  72.5 75.  77.5]
 [80.  82.5 85.  87.5 90.  92.5 95.  97.5]]
[[[ 0.   2.5  5.   7.5 10. ]
  [12.5 15.  17.5 20.  22.5]
  [25.  27.5 30.  32.5 35. ]
  [37.5 40.  42.5 45.  47.5]]

 [[50.  52.5 55.  57.5 60. ]
  [62.5 65.  67.5 70.  72.5]
  [75.  77.5 80.  82.5 85. ]
  [87.5 90.  92.5 95.  97.5]]]

4.设置固定值

zeros_0D = np.zeros((),dtype='int32')
zeros_1D = np.zeros(1)
# zeros_1D = np.zeros((1,)) # 和上面的写法等价
print( zeros_0D )
print( zeros_1D  )

0
[0.]
ones = np.ones((2,3), dtype=np.int16) # 和dtype='int16'效果一样
print( ones )
[[1 1 1]
 [1 1 1]]
nines = np.full((3,3), 9)
print( nines )
[[9 9 9]
 [9 9 9]
 [9 9 9]]

5.数据类型

# 注意类型的取值范围,否则会有溢出风险
arr = np.array([0, 1, 32767])
print( arr )
print( arr.dtype )

arr2 = np.array([0, 1, 32767+1], dtype='int16')
print( arr2 )
print( arr2.dtype )
[    0     1 32767]
int32
[     0      1 -32768]
int16

6.随机数

# 取值范围[0.0,1)的浮点数
random_arr1 = np.random.random((3,4,5))
print( random_arr1 )
[[[0.4886442  0.72728616 0.87807748 0.44123081 0.48527117]
  [0.88290388 0.62528213 0.28645992 0.0739232  0.45536353]
  [0.69724842 0.60985454 0.18877139 0.78754942 0.29011158]
  [0.86860128 0.40265775 0.58179793 0.30110881 0.46906258]]

 [[0.19105051 0.9495605  0.38240658 0.3390702  0.84465178]
  [0.62668747 0.96483084 0.57429467 0.49413892 0.08760963]
  [0.99841977 0.22288066 0.66710384 0.49018811 0.54936051]
  [0.64283131 0.45061287 0.3389911  0.76284574 0.29129994]]

 [[0.87851221 0.45601815 0.41775379 0.94792158 0.13049978]
  [0.59642401 0.23806206 0.57647557 0.79502845 0.87306712]
  [0.77632754 0.8392334  0.55298989 0.26138086 0.97001368]
  [0.52822754 0.57392785 0.27968511 0.76556942 0.42062802]]]
# Return a sample (or samples) from the "standard normal" distribution.
# 标准正态分布
random_arr2 = np.random.randn(3,4,5)
print( random_arr2 )
[[[ 0.16582414 -0.5015488  -1.70928753  0.9464795   0.73221356]
  [ 0.33959013 -0.17694127  1.50668233  1.3588292   0.50581724]
  [-0.35953226  0.5068349  -2.00201338 -0.80015586  1.2165169 ]
  [ 0.02186408  0.27449598  0.61429625 -0.78573229 -0.86533094]]

 [[ 0.04900541 -0.1592978   0.82502207  1.16772055 -0.71490851]
  [ 0.82113987 -0.06749592  1.13770099 -0.18931726 -0.71091965]
  [-1.81822172 -0.82666248  1.59172575  0.55207281  0.3434944 ]
  [ 1.12813987 -0.31580281 -0.37460455  0.63562808 -0.97394781]]

 [[ 0.877994   -1.18584087  0.21646484 -0.70181109  0.97325045]
  [-0.88755061 -0.65336685  0.09534154  1.23654304  0.19405336]
  [ 2.61249664 -1.52579744  1.61827559  0.74583308  0.80894465]
  [-0.97862698 -0.58153689 -0.87124485 -1.03345774  0.16262545]]]
# ([起点, 终点), 形状)  的整数数组
random_arr3 = np.random.randint(2, 4, size=(3,4,5))
print( random_arr3 )
[[[2 2 3 3 2]
  [2 3 2 3 3]
  [2 3 2 3 2]
  [3 3 3 3 2]]

 [[3 3 2 3 2]
  [3 2 2 3 3]
  [2 2 2 3 3]
  [3 2 3 3 3]]

 [[3 3 2 2 2]
  [3 2 3 3 2]
  [2 2 2 3 3]
  [3 2 2 3 2]]]

7.索引

row_Students = [1, 2, 3, 4, 5, 6]
row_Math = [66, 34, 88, 96, 57, 70]
row_English = [58, 22, 100, 49, 65, 34]
row_Sports = [99, 84, 48, 38, 25 , 56]

students  = np.array([row_Students, row_Math, row_English, row_Sports])
print( students )
print( students.shape )
[[  1   2   3   4   5   6]
 [ 66  34  88  96  57  70]
 [ 58  22 100  49  65  34]
 [ 99  84  48  38  25  56]]
(4, 6)
# 获得所有学生4的所有成绩
print( students[1:, 3] )
# 等价于 print( students[1:4][3] ) 

# 获得所有学生1 3 5及其所有成绩
print( students[:, ::2] )
# print( students[0:4, 0::2] )
# ::2 表示从0开始间隔2
[96 49 38]
[[  1   3   5]
 [ 66  88  57]
 [ 58 100  65]
 [ 99  48  25]]
print( students > 60 )

# 索引掩码
mask = ( students >= 60 ) & ( students < 90 ) 
print( mask )

# 返回的是符合条件的值的列表
print(students[mask])

# 想要知道具体的位置,使用where函数,还可以指定填充不符合条件位置的显示值
print( np.where(mask, students, np.nan) )

# 返回掩码为True的数组元素的索引位置
print( np.argwhere(mask) )
[[False False False False False False]
 [ True False  True  True False  True]
 [False False  True False  True False]
 [ True  True False False False False]]
[[False False False False False False]
 [ True False  True False False  True]
 [False False False False  True False]
 [False  True False False False False]]
[66 88 70 65 84]
[[nan nan nan nan nan nan]
 [66. nan 88. nan nan 70.]
 [nan nan nan nan 65. nan]
 [nan 84. nan nan nan nan]]
[[1 0]
 [1 2]
 [1 5]
 [2 4]
 [3 1]]

8.广播

eye = np.eye(5)
print( eye )
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
# 为每一行的元素依次加上0.1, 0.2, 0.3, 0.4, 0.5
# (5,5) + (5,)
print( eye + [0.1, 0.2, 0.3, 0.4, 0.5] )
[[1.1 0.2 0.3 0.4 0.5]
 [0.1 1.2 0.3 0.4 0.5]
 [0.1 0.2 1.3 0.4 0.5]
 [0.1 0.2 0.3 1.4 0.5]
 [0.1 0.2 0.3 0.4 1.5]]
# 为每一行整体分别加上 10, 20, 30, 40, 50
# (5,5) + (5,1)
print( eye + [[10 ],[20],[30],[40],[50]] )
[[11. 10. 10. 10. 10.]
 [20. 21. 20. 20. 20.]
 [30. 30. 31. 30. 30.]
 [40. 40. 40. 41. 40.]
 [50. 50. 50. 50. 51.]]
arr = np.random.randint(1, 100, size=(5,4))
print( arr )

[[70 69 65 48]
 [ 2 46 54 48]
 [ 2  3 59 71]
 [70  2 38 68]
 [67  4 79 54]]
# 数组元素求和
print( arr.sum() )
919

9.轴向

# 返回的shape是(4,)
print( arr.max(axis=0) )
# 返回的shape是(5,)
print( arr.max(axis=1) )
[70 69 79 71]
[70 54 71 70 79]
# 三维数组情况
arr = np.random.randint(1, 10, size=(3,2,3))
print( arr )

[[[6 4 4]
  [9 2 1]]

 [[2 2 1]
  [5 2 5]]

 [[3 4 7]
  [1 6 2]]]
# arr.shape = (3, 2, 3)

# axis=0 返回的shape是(2, 3) 
print( arr.max(axis=0) )

# axis=1 返回的shape是(3, 3)
print( arr.max(axis=1) )

# axis=2 返回的shape是(3, 2)
print( arr.max(axis=2) )
[[6 4 7]
 [9 6 5]]
[[9 4 4]
 [5 2 5]
 [3 6 7]]
[[6 9]
 [2 5]
 [7 6]]

10.拷贝

arr = np.linspace(0, 5, 10)
print( arr )
[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]
# view方法拷贝
arr_view = arr.view()
print( arr_view )
[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]
# view方法的特点:对view拷贝后的数组的操作会改变原数组,原数组的修改也会改变view拷贝后的数组
arr_view[0] = np.nan
print( arr )
print( arr_view)
[       nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]
[       nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]
# copy拷贝
arr_copy = arr.copy()
print( arr_copy )
[       nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]
# copy方法就不会互相影响
arr_copy[0] = 0.
print( arr )
print( arr_copy)
[       nan 0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]
[0.         0.55555556 1.11111111 1.66666667 2.22222222 2.77777778
 3.33333333 3.88888889 4.44444444 5.        ]

11.更新与删除

# 数组不局限于数字
arr = np.array(['鸡蛋', '鸭蛋', '龟蛋', '鸵鸟蛋', '鸭蛋', '龟蛋', '鸵鸟蛋'])

# 数组去重
arr2 =np.unique(arr)
print( arr2 )

['鸡蛋' '鸭蛋' '鸵鸟蛋' '龟蛋']
arr3 = np.array(['好蛋', '坏蛋'])

# 数组合并
print( np.concatenate([arr2,arr3]) )

# 指定位置合并
# 在arr2第2个位置开始插入arr3元素
print(np.insert(arr2, 1, arr3)) 

# 删除元素
# 操作是副本进行的,需要有变量接收删除操作后的返回值
arr4 = np.delete(arr3,0)
print( arr3 )
print( arr4 )

['鸡蛋' '鸭蛋' '鸵鸟蛋' '龟蛋' '好蛋' '坏蛋']
['鸡蛋' '好蛋' '坏蛋' '鸭蛋' '鸵鸟蛋' '龟蛋']
['好蛋' '坏蛋']
['坏蛋']

你可能感兴趣的:(numpy,python,数据分析)