# 查看版本 .__version__
import numpy as np
np.__version__
>>>
'1.16.2'
Numpy 包的核心是 ndarry 对象,n维数组,电脑上所有数据都是数字形式保存。
Numpy 中文手册
np.array()
由 python list 创建l = [1,2,3,4,5]
print(type(l))
nd = np.array(l)
print(type(nd))
>>>
<class 'list'>
<class 'numpy.ndarray'>
l2 = [[1,3,5,7],[2,4,6,8]]
print(l2)
nd = np.array(l2)
print(nd)
>>>
[[1, 3, 5, 7], [2, 4, 6, 8]]
[[1 3 5 7]
[2 4 6 8]]
np.array
和 list
的异同:
%timeit nd.sum()
>>>
2.13 µs ± 44.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
def sum2(x):
ret = 0
for i in x:
ret += i
return ret
%timeit sum2(nd)
>>>
2.78 µs ± 81.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1、np.ones(shape,dtype=None,order='C)
np.ones((5,5),np.int8)
>>>
array([[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]], dtype=int8)
2、np.zeros()
np.zeros((2,3,4),np.float16)
>>>
array([[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]], dtype=float16)
3、np.full(shape, fill_value, dtype=None,order='C'
np.full((3,5),3.14)
>>>
array([[3.14, 3.14, 3.14, 3.14, 3.14],
[3.14, 3.14, 3.14, 3.14, 3.14],
[3.14, 3.14, 3.14, 3.14, 3.14]])
4、np.eye(N, M=None, k=0, dtype = float)
np.eye(5)
>>>
array([[1., 0., 0., 0., 0.],
[0., 1., 0., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 0., 0., 1.]])
5、np.linespace(start, stop, num=50, endpoint=True,retstep=False,dtype=None)
# 也叫等差数列
np.linspace(0,100,num=101)
>>>
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.,
11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21.,
22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43.,
44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54.,
55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
66., 67., 68., 69., 70., 71., 72., 73., 74., 75., 76.,
77., 78., 79., 80., 81., 82., 83., 84., 85., 86., 87.,
88., 89., 90., 91., 92., 93., 94., 95., 96., 97., 98.,
99., 100.])
6、np.arange(start, stop, step, dtype=None)
# 等间隔数列
np.arange(1,10,2)
>>>
array([1, 3, 5, 7, 9])
7、np.random.randint(low,high=None,size=None,dtype='l')
# 一定范围内的随机数
np.random.randint(0,100,(2,4))
>>>
array([[98, 63, 18, 11],
[40, 58, 62, 12]])
8、np.random.randn(d0, d1,...,dn)
# 正态分布,均值为 0,方差为 1
np.random.randn(4,5)
array([[ 0.41147432, -0.56841502, -0.64467238, -0.88004511, -0.22976166],
[-0.14085248, -0.47433223, -0.20435651, 0.42996496, 1.0578385 ],
[-0.60056564, 1.10390688, -0.66069103, 0.3525401 , -0.75591328],
[-1.22475853, -0.18057379, 0.56044618, 2.00066107, 1.47393763]])
9、np.random.normal(loc=0.0, scale = 1.0, size = None)
# 自定义正态分布
# loc 均值,scale 标准差,size 尺寸
np.random.normal(1.75,0.5,10)
>>>
array([1.36952483, 2.34225265, 1.09145538, 2.71158254, 0.96843264,
1.79764286, 1.7570981 , 1.8556677 , 2.7517673 , 2.18922818])
# .round(n) 小数点后保留n位
np.random.normal(1.75,0.5,10).round(2)
>>>
array([1.42, 2.12, 0.84, 1.72, 1.52, 1.17, 1.61, 2.05, 2.68, 1.7 ])
10、np.random.random(size=None)
# 生成 0~1 的随机数,左闭右开
np.random.random(10)
>>>
array([0.62466642, 0.71381154, 0.63322465, 0.36875306, 0.41764157,
0.11489776, 0.53775854, 0.56280265, 0.24020451, 0.37022401])
四个必记的参数:
nd = np.random.normal(1.75,10,10).round(2)
print(nd)
>>>
[ 6.7 12.78 -10.42 -3.69 8.15 -0.28 13.65 -0.48 17.71 4.11]
nd.shape
nd.size
nd.ndim
nd.dtype
>>>
(10,)
10
1
dtype('float64')
1、索引
ndarry 索引和列表完全一致,多维索引和一维、二维索引规律完全相同
nd2 = np.random.randint(0,150,(4,5))
print(nd2)
>>>
[[ 91 115 78 49 145]
[141 78 117 21 18]
[ 53 93 110 26 128]
[ 38 93 105 27 65]]
nd2[1,1]
nd2[2]
>>>
78
array([ 53, 93, 110, 26, 128])
2、切片
nd2[0:2]
>>>
array([[ 91, 115, 78, 49, 145],
[141, 78, 117, 21, 18]])
nd2[-2:]
>>>
array([[ 53, 93, 110, 26, 128],
[ 38, 93, 105, 27, 65]])
nd2[0:3,0:3]
>>>
array([[ 91, 115, 78],
[141, 78, 117],
[ 53, 93, 110]])
数据翻转:
print(nd)
>>>
[ 6.7 12.78 -10.42 -3.69 8.15 -0.28 13.65 -0.48 17.71 4.11]
nd[::-1]
>>>
array([ 4.11, 17.71, -0.48, 13.65, -0.28, 8.15, -3.69, -10.42,
12.78, 6.7 ])
nd[::2]
>>>
array([ 6.7 , -10.42, 8.15, 13.65, 17.71])
练习:对图像操作
from PIL import Image
img = Image.open('resnet18.jpg')
img_data = np.array(img)
img_data.shape
img_data.dtype
>>>
# 行 列 页
(3016, 4032, 3)
# 图像是 uint8 类型,2^8=256,可以表示 256 个值(-128,127)
dtype('uint8')
int8:8位有符号整型(-127~128)
uint8:8位无符号整型(0~255)
img_data.max()
>>>
226
img_data.min()
>>>
0
3、变形
# reshape
nd
>>>
array([11. , 2.41, 1.38, 13.22, 4.9 , -2.03, 8.64, -9.92, -1.88, -4.34])
# reshape
nd2 = nd.reshape(2,5)
print(nd2)
>>>
[[11. , 2.41, 1.38, 13.22, 4.9 ],
[-2.03, 8.64, -9.92, -1.88, -4.34]]
4、转置
# np.transpose
nd2.transpose()
>>>
array([[11. , -2.03],
[ 2.41, 8.64],
[ 1.38, -9.92],
[13.22, -1.88],
[ 4.9 , -4.34]])
5、级联
# np.concatenate()
np.concatenate([nd2,nd2])
>>>
array([[11. , 2.41, 1.38, 13.22, 4.9 ],
[-2.03, 8.64, -9.92, -1.88, -4.34],
[11. , 2.41, 1.38, 13.22, 4.9 ],
[-2.03, 8.64, -9.92, -1.88, -4.34]])
6、水平级联+竖直级联
# np.hstack 水平方向,axis = 1
nd3 = np.hstack([nd2,nd2])
print(nd3)
>>>
[[11. 2.41 1.38 13.22 4.9 11. 2.41 1.38 13.22 4.9 ]
[-2.03 8.64 -9.92 -1.88 -4.34 -2.03 8.64 -9.92 -1.88 -4.34]]
# np.vstack 竖直方向,axis=0
nd4 = np.vstack([nd2,nd2])
print(nd4)
>>>
[[11. 2.41 1.38 13.22 4.9 ]
[-2.03 8.64 -9.92 -1.88 -4.34]
[11. 2.41 1.38 13.22 4.9 ]
[-2.03 8.64 -9.92 -1.88 -4.34]]
7、切分
# np.split(x,N) 平均分成 N 份
print(nd4)
nd5 = np.split(nd4, 4)
print(nd5)
>>>
[[11. 2.41 1.38 13.22 4.9 ]
[-2.03 8.64 -9.92 -1.88 -4.34]
[11. 2.41 1.38 13.22 4.9 ]
[-2.03 8.64 -9.92 -1.88 -4.34]]
[array([[11. , 2.41, 1.38, 13.22, 4.9 ]]),
array([[-2.03, 8.64, -9.92, -1.88, -4.34]]),
array([[11. , 2.41, 1.38, 13.22, 4.9 ]]),
array([[-2.03, 8.64, -9.92, -1.88, -4.34]])]
# np.split(x,[N1,N2]) 从 N1 行和 N2 行分别切开
nd6 = np.split(nd4,[1,3])
>>>
[array([[11. , 2.41, 1.38, 13.22, 4.9 ]]),
array([[-2.03, 8.64, -9.92, -1.88, -4.34],
[11. , 2.41, 1.38, 13.22, 4.9 ]]),
array([[-2.03, 8.64, -9.92, -1.88, -4.34]])]
# np.vsplit
np.vsplit(nd4,4)
>>>
[array([[11. , 2.41, 1.38, 13.22, 4.9 ]]),
array([[-2.03, 8.64, -9.92, -1.88, -4.34]]),
array([[11. , 2.41, 1.38, 13.22, 4.9 ]]),
array([[-2.03, 8.64, -9.92, -1.88, -4.34]])]
# np.hsplit
nd7 = np.hsplit(nd4,5)
[array([[11. ],
[-2.03],
[11. ],
[-2.03]]), array([[2.41],
[8.64],
[2.41],
[8.64]]), array([[ 1.38],
[-9.92],
[ 1.38],
[-9.92]]), array([[13.22],
[-1.88],
[13.22],
[-1.88]]), array([[ 4.9 ],
[-4.34],
[ 4.9 ],
[-4.34]])]
8、副本
nd7 = nd5
id(nd7)
id(nd5)
>>>
2179900151752
2179900151752
# 使用 copy
nd7 = nd5
id(nd7)
id(nd5)
>>>
2179871747976
2179900151752
9、numpy 的统计学:
平均值、中位数、方差、标准差、协方差、关联
# 直方图
import numpy as np
nd = np.random.randint(0,1000,100)
np.histogram(nd, bins=10)
>>>
# 第一个 array 是每个 bin 里边的数值个数
# 第二个 array 是 bin 的取值范围
(array([13, 10, 7, 12, 8, 5, 11, 11, 5, 18], dtype=int64),
array([ 10. , 108.8, 207.6, 306.4, 405.2, 504. , 602.8, 701.6, 800.4, 899.2, 998. ]))
10、操作文件
# 保存 npy 文件,np.save,npy 文件是二进制文件
np.save('./data.npy',nd)
# load 文件
np.load('./data.npy')
# 保存 txt 文件
np.savetxt('./data.txt',nd)
# 加载 txt 文件
np.loadtxt('./data.txt')
1、基本矩阵操作
1)算数运算符:
2)矩阵积
a = np.random.randint(0,100,(4,5))
b = np.random.randint(0,100,(5,4))
np.dot(a,b)
>>>
array([[16215, 14663, 15557, 21490],
[ 9840, 9357, 9573, 16708],
[14834, 14976, 16061, 20842],
[15684, 16925, 17148, 15196]])
2、广播机制
m = np.ones((2,3))
print('m:',m)
a = np.arange(3)
print('a:',a)
m+a
>>>
m: [[1. 1. 1.]
[1. 1. 1.]]
a: [0 1 2]
array([[1., 2., 3.],
[1., 2., 3.]])
3、排序
1、排序
# np.sort(nd)
# nd.sort
2、部分排序
# np.partition(x, kth=N),排前 N 个最小的
# np.partition(x, kth=-N),排前 N 个最大的
nd2 = np.random.randint(2,30,10)
print(nd2)
np.partition(nd2,kth = 2) # 只排前两个最小的
>>>
[10 4 22 13 24 16 28 22 22 22]
array([ 4, 10, 13, 22, 24, 16, 28, 22, 22, 22])