np.genfromtxt(data, ...) 链接
np.genfromtxt(data, ...)
>>> data = BytesIO("So it goesn#a b cn1 2 3n 4 5 6")
>>> np.genfromtxt(data, skip_header=1, names=True)
array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)],
dtype=[('a', '>> data = "N/A, 2, 3n4, ,???"
>>> kwargs = dict(delimiter=",",
... dtype=int,
... names="a,b,c",
... missing_values={
0:"N/A", 'b':" ", 2:"???"},
... filling_values={
0:0, 'b':0, 2:-999})
>>> np.genfromtxt(BytesIO(data), **kwargs)
array([(0, 2, 3), (4, 0, -999)],
dtype=[('a', '
k维的数组,索引时需要k个索引值,索引值可以是常数、切片、数组索引(整数、布尔)
a = array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
# a.shape = (3, 4)
# 整数及切片索引
# Slice用的是view的方式,而index用的是copy方式
a[1,2] # 6
a[0, [1,2]] # [1, 2]
a[0:2, 2:4] # [[2, 3], [6, 7]]
a[:, 2:4] # [[2, 3], [6, 7], [10, 11]]
a[0:1, ..., 2:4, 0]
# 整数数组索引
i=array([[1, 2], [0, 1]])
j=array([[0, 2], [1, 2]])
a[i, j] == array([[ 2, 5],
[ 7, 11]])
# a[i, j].shape == a[0, j].shape == a[i, 0].shape == (2, 2)
a[:, j] == array([[[ 2, 1],
[ 3, 3]],
[[ 6, 5],
[ 7, 7]],
[[10, 9],
[11, 11]]])
# a[:, j].shape == (3, 2, 2)
a[i, :] == array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7]],
[[ 4, 5, 6, 7],
[ 8, 9, 10, 11]]])
# a[i, :].shape == (2, 2, 4)
# 布尔数组索引
b = a > 4
b == array([[False, False, False, False],
[False, True, True, True],
[ True, True, True, True]], dtype=bool)
a[b] == array([ 5, 6, 7, 8, 9, 10, 11])
a[b] = 0 # 只是视图,可以赋值
a == array([[0, 1, 2, 3],
[4, 0, 0, 0],
[0, 0, 0, 0]])
a[a[:,0]>2, a[0,:]>1] # a中第一列大于2的行且第一行大于1的列的元素
# ix_函数可以产生多元组合
a = array([2,3,4,5])
b = array([8,5,4])
c = array([5,4,6,8,3])
ax,bx,cx = ix_(a,b,c)
# ax.shape, bx.shape, cx.shape == ((4, 1, 1), (1, 3, 1), (1, 1, 5))
result = ax+bx*cx #利用广播机制,产生不同维度的所有组合
result[3,2,4] == a[3]+b[2]*c[4] == 17
# 笛卡尔积
numpy.transpose([numpy.tile(x, len(y)), numpy.repeat(y, len(x))])
array([[1, 4],
[2, 4],
[3, 4],
[1, 5],
[2, 5],
[3, 5]])
[[x0, y0] for x0 in x for y0 in y]
# 查找元素
np.where(condition, [x, y]) #[x,y]为可选项,满足条件返回x,不满足条件返回y
>>> np.where(a < 4, a, -1) # -1 is broadcast
array([[ 0, 1, 2],
[ 0, 2, -1],
[ 0, 3, -1]])
np.argwhere() #返回满足括号内条件的元素的索引,数组形式
>>> np.argwhere(a>5)
array([[0, 2],
[1, 0],
[1, 3],
[2, 3]], dtype=int64)
# 获取索引迭代器
>>> for index in np.ndindex(3, 2, 1):
| ... print(index)
| (0, 0, 0)
| (0, 1, 0)
| (1, 0, 0)
| (1, 1, 0)
| (2, 0, 0)
| (2, 1, 0)
# 获取索引+元素迭代器
a = np.random.randint(0,5, (3,2))
for i,x in np.ndenumerate(a):
print(i, x)
(0, 0) 4
(0, 1) 0
(1, 0) 3
(1, 1) 3
(2, 0) 3
(2, 1) 4
np.nditer(a, order=)
默认按内存顺序迭代访问,可通过 order='C' 指定C顺序或 order='F' 指定Fortran顺序
# 迭代读取
>>> a = np.arange(6).reshape(2,3)
>>> for x in np.nditer(a, order='F'):
print(x, end=' ')
0 3 1 4 2 5
# 迭代修改,需要指定 op_flag 参数。with 上下文管理和 itr.close() 只在1.16及之后版本支持。
>>> a
array([[0, 1, 2],
[3, 4, 5]])
>>> with np.nditer(a, op_flags=['readwrite']) as it:
for x in it:
x[...] = 2 * x
>>> a
array([[ 0, 2, 4],
[ 6, 8, 10]])
# 更高效的 External Loop,不能和 c_index 或 multi_index 同时使用
>>> for x in np.nditer(a, flags=['external_loop']):
print(x, end=' ')
[0 1 2 3 4 5]
# 同时获取索引
>>> it = np.nditer(a, flags=['multi_index'])
>>> for x in it:
print("%d <%s>" % (x, it.multi_index), end=' ')
0 <(0, 0)> 1 <(0, 1)> 2 <(0, 2)> 3 <(1, 0)> 4 <(1, 1)> 5 <(1, 2)>
# 形状改变
>>> a.ravel() # 共用存储, a.flatten() 产生1个新数据
array([ 2., 8., 0., 6., 4., 5., 1., 1., 8., 9., 3., 6.])
>>> a.reshape(6,2) # 共用存储
array([[ 2., 8.],
[ 0., 6.],
[ 4., 5.],
[ 1., 1.],
[ 8., 9.],
[ 3., 6.]])
>>> a.shape
(3, 4)
>>> a.T # 共用存储
array([[ 2., 4., 8.],
[ 8., 5., 9.],
[ 0., 1., 3.],
[ 6., 1., 6.]])
>>> a.T.shape
(4, 3)
# 数组转置
transpose(a[, axes]) # 共用存储
Reverse or permute the axes of an array; returns view of the array.
swapaxes(a, axis1, axis2) # 共用存储
Interchange two axes of an array.
# 扩维缩维
expand_dims(a, axis) # 共用存储
Expand the shape of an array.
squeeze(a[, axis]) # 共用存储
Remove axes of length one from a.
# 数组连接
concatenate([axis, out])
Join a sequence of arrays along an existing axis.
stack(arrays[, axis, out])
Join a sequence of arrays along a new axis.
#数组pad
>>> a = np.ones((2,2))
>>> np.pad(a, mode='constant', pad_width=(1,2), constant_values=0)
array([[0., 0., 0., 0., 0.],
[0., 1., 1., 0., 0.],
[0., 1., 1., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])
# 数组拆分
split(ary, indices_or_sections[, axis])
Split an array into multiple sub-arrays as views into ary.
不能完全等分时,会抛 ValueError 异常,array_split 不抛异常。
>>> x = np.arange(9.0)
>>> np.split(x, 3)
[array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])]
>>> x = np.arange(8.0)
>>> np.split(x, [3, 5, 6, 10])
[array([0., 1., 2.]),
array([3., 4.]),
array([5.]),
array([6., 7.]),
array([], dtype=float64)]
# 数组重复
tile(A, reps)
Construct an array by repeating A the number of times given by reps.
A和 reps 会通过广播机制 broadcast 到相同的 ndim
>>> b = np.array([[1, 2], [3, 4]])
>>>np.tile(b, 2) # reps broadcast 到 (1,2),第一维重复1次,第二维重复2次
array([[1, 2, 1, 2],
[3, 4, 3, 4]])
>>> np.tile(b, (2, 1))
array([[1, 2],
[3, 4],
[1, 2],
[3, 4]])
>>> c = np.array([1,2,3,4])
>>> np.tile(c,(4,1)) # c broadcast 到 (1,4),第一维重复4次,第二维重复1次
array([[1, 2, 3, 4],
[1, 2, 3, 4],
[1, 2, 3, 4],
[1, 2, 3, 4]])
repeat(a, repeats[, axis])
Repeat elements of an array. 不指定 axis 就展成一维repeat
>>> np.repeat(3, 4)
array([3, 3, 3, 3])
>>> x = np.array([[1,2],[3,4]])
>>> np.repeat(x, 2)
array([1, 1, 2, 2, 3, 3, 4, 4])
>>> np.repeat(x, 3, axis=1) # 沿着axis=1,每个元素重复3次
array([[1, 1, 1, 2, 2, 2],
[3, 3, 3, 4, 4, 4]])
>>> np.repeat(x, [1, 2], axis=0) #沿着axis=0,第一行重复1次,第二行重复2次
array([[1, 2],
[3, 4],
[3, 4]])
# 元素换位
flip(m[, axis])
Reverse the order of elements in an array along the given axis.
返回1个改变了元素顺序的 m 的视图,数组 shape 不变。
flip(m, 0) is equivalent to flipud(m).
flip(m, 1) is equivalent to fliplr(m).
flip(m, n) corresponds to m[...,::-1,...] with ::-1 at position n.
flip(m) corresponds to m[::-1,::-1,...,::-1] with ::-1 at all positions.
flip(m, (0, 1)) corresponds to m[::-1,::-1,...] with ::-1 at position 0 and position 1.
roll(a, shift[, axis])
Roll array elements along a given axis.
Elements that roll beyond the last position are re-introduced at the first.
>>> x2 = np.reshape(x, (2,5))
>>> x2
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
>>> np.roll(x2, 1)
array([[9, 0, 1, 2, 3],
[4, 5, 6, 7, 8]])
>>> np.roll(x2, -1)
array([[1, 2, 3, 4, 5],
[6, 7, 8, 9, 0]])
>>> np.roll(x2, 1, axis=0)
array([[5, 6, 7, 8, 9],
[0, 1, 2, 3, 4]])
>>> np.roll(x2, -1, axis=0)
array([[5, 6, 7, 8, 9],
[0, 1, 2, 3, 4]])
>>> np.roll(x2, 1, axis=1)
array([[4, 0, 1, 2, 3],
[9, 5, 6, 7, 8]])
>>> np.roll(x2, -1, axis=1)
array([[1, 2, 3, 4, 0],
[6, 7, 8, 9, 5]])
# 数组增删
delete(arr, obj[, axis]) # 返回全新数组
Return a new array with sub-arrays along an axis deleted.
>>> np.delete(arr, [1,3,5], axis=None)
array([ 1, 3, 5, 7, 8, 9, 10, 11, 12])
>>> arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
array([[ 1, 2, 3, 4],
[ 5, 6, 7, 8],
[ 9, 10, 11, 12]])
>>> np.delete(arr, 1, axis=0)
array([[ 1, 2, 3, 4],
[ 9, 10, 11, 12]])
insert(arr, obj, values[, axis]) # 返回全新数组
Insert values along the given axis before the given indices.
>>> a = np.array([[1, 1], [2, 2], [3, 3]])
array([[1, 1],
[2, 2],
[3, 3]])
>>> np.insert(a, 1, 5)
array([1, 5, 1, ..., 2, 3, 3])
>>> np.insert(a, 1, 5, axis=1)
array([[1, 5, 1],
[2, 5, 2],
[3, 5, 3]])
append(arr, values[, axis]) # 返回全新数组
Append values to the end of an array.
>>> np.append([1, 2, 3], [[4, 5, 6], [7, 8, 9]]) # 不指定 axis,默认 flatten append
array([1, 2, 3, ..., 7, 8, 9])
>>> np.append([[1, 2, 3], [4, 5, 6]], [[7, 8, 9]], axis=0) # 指定 axis,shape 需匹配
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> np.append([[1, 2, 3], [4, 5, 6]], [7, 8, 9], axis=0)
Traceback (most recent call last):
...
# 数组排序
sample59 = np.random.randint(0, 9, (3, 5))
print(sample59)
print(np.sort(sample59, axis=0)) # 全部都排序
print(np.argsort(sample59, axis=0)) # 给出的是位置
print(sample59[sample59[:, 1].argsort()]) # 按第二列排序
位运算
Elementwise bit operations
bitwise_and(x1, x2, /[, out, where, …])
Compute the bit-wise AND of two arrays element-wise.
bitwise_or(x1, x2, /[, out, where, casting, …])
Compute the bit-wise OR of two arrays element-wise.
bitwise_xor(x1, x2, /[, out, where, …])
Compute the bit-wise XOR of two arrays element-wise.
invert(x, /[, out, where, casting, order, …])
Compute bit-wise inversion, or bit-wise NOT, element-wise.
left_shift(x1, x2, /[, out, where, casting, …])
Shift the bits of an integer to the left.
right_shift(x1, x2, /[, out, where, …])
Shift the bits of an integer to the right.
Bit packing
packbits(a[, axis, bitorder])
Packs the elements of a binary-valued array into bits in a uint8 array.
unpackbits(a[, axis, count, bitorder])
Unpacks elements of a uint8 array into a binary-valued output array.
Output formatting
binary_repr(num[, width])
Return the binary representation of the input number as a string.
逻辑运算
Logical operations
logical_and(x1, x2, /[, out, where, …])
Compute the truth value of x1 AND x2 element-wise.
logical_or(x1, x2, /[, out, where, casting, …])
Compute the truth value of x1 OR x2 element-wise.
logical_not(x, /[, out, where, casting, …])
Compute the truth value of NOT x element-wise.
logical_xor(x1, x2, /[, out, where, …])
Compute the truth value of x1 XOR x2, element-wise.
Comparison
allclose(a, b[, rtol, atol, equal_nan])
Returns True if two arrays are element-wise equal within a tolerance.
isclose(a, b[, rtol, atol, equal_nan])
Returns a boolean array where two arrays are element-wise equal within a tolerance.
array_equal(a1, a2[, equal_nan])
True if two arrays have the same shape and elements, False otherwise.
array_equiv(a1, a2)
Returns True if input arrays are shape consistent and all elements equal.
greater(x1, x2, /[, out, where, casting, …])
Return the truth value of (x1 > x2) element-wise.
greater_equal(x1, x2, /[, out, where, …])
Return the truth value of (x1 >= x2) element-wise.
less(x1, x2, /[, out, where, casting, …])
Return the truth value of (x1 < x2) element-wise.
less_equal(x1, x2, /[, out, where, casting, …])
Return the truth value of (x1 =< x2) element-wise.
equal(x1, x2, /[, out, where, casting, …])
Return (x1 == x2) element-wise.
not_equal(x1, x2, /[, out, where, casting, …])
Return (x1 != x2) element-wise.
随机数(新版api)
# Do this (new version)
from numpy.random import default_rng
rng = default_rng()
vals = rng.standard_normal(10)
more_vals = rng.standard_normal(10)
# instead of this (legacy version)
from numpy import random
vals = random.standard_normal(10)
more_vals = random.standard_normal(10)
数学运算
a = np.array([-22.76721327, 10.48268006, -0.4032699, 2.90810892, -4.09077903])
print(np.floor(a)) #地板 [-23. 10. -1. 2. -5.]
print(np.ceil(a)) #天花板 [-22. 11. -0. 3. -4.]
print(np.rint(a)) #到最近的整数 [-23. 10. -0. 3. -4.]
print(np.round(a)) #到最近的整数 [-23. 10. -0. 3. -4.]
print(np.trunc(a)) #只保留整数部分 [-22. 10. -0. 2. -4.]
统计运算
# 求极值
maximum(x1, x2, out=None), mininum 同理,不能处理 nan,需要使用 nanmaxinum
Element-wise maximum of array elements. 要求 x1 和 x2 shape 相同或可以broadcast到相同。
max(a, axis), min 同理,不能处理 nan,需要使用 nanmax
返回数组最大值 或 沿某一维的最大值。结果的 shape 比输入小一维。
>>> np.max([5], initial=6) == 6 # 与 python 默认的 default 逻辑不同
>>> max([5], default=6) == 5
argmax(a, axis), argmin 同理,不能处理 nan,需要使用 nanargmax
返回数组最大值对应的索引值,或沿某一维的最大值的索引值。结果的 shape 比输入小一维。
ptp(a[, axis, out, keepdims])
Range of values (maximum - minimum) along an axis.
# 极值裁剪
clip(a, a_min, a_max, out=None, **kwargs)
Clip (limit) the values in an array. 默认返回一个新数组,out=a 时会替换原数组。
# 求分位数
percentile(a, q[, axis, out, …])
Compute the q-th percentile of the data along the specified axis.
quantile(a, q[, axis, out, overwrite_input, …])
Compute the q-th quantile of the data along the specified axis.
>>> a
array([[10, 7, 4],
[ 3, 2, 1]])
>>> np.quantile(a, 0.5)
3.5
>>> np.quantile(a, 0.5, axis=0)
array([6.5, 4.5, 2.5])
>>> np.quantile(a, 0.5, axis=1)
array([7., 2.])
>>> np.quantile(a, 0.5, axis=1, keepdims=True)
array([[7.],
[2.]])
# 加权求和
average(a, axis=None, weights=None, returned=False)
Compute the weighted average along the specified axis.
>>> data
array([[0, 1],
[2, 3],
[4, 5]])
>>> np.average(data, axis=1, weights=[1./4, 3./4])
array([0.75, 2.75, 4.75])
# 差分运算
diff(a, n=1, axis=-1, prepend=, append=)
Calculate the n-th discrete difference along the given axis.
结果在axis维上的长度为 shape-n, 在其它维上的长度与a相同。
>>> x = np.array([1, 2, 4, 7, 0])
>>> np.diff(x)
array([ 1, 2, 3, -7])
>>> np.diff(x, n=2)
array([ 1, 1, -10])
>>> x = np.array([[1, 3, 6, 10], [0, 5, 6, 8]])
>>> np.diff(x)
array([[2, 3, 4],
[5, 1, 2]])
>>> np.diff(x, axis=0)
array([[-1, 2, 0, -2]])
>>> np.all(np.diff(xp) > 0) # 判断xp是否是递增的
# 插值运算
interp(x, xp, fp, left=None, right=None, period=None)
One-dimensional linear interpolation. Returns the one-dimensional piecewise
linear interpolant to a function(xp, fp), evaluated at x.
对一元线性函数 fp = f(xp), xp 需要单调递增,返回 x 所在位置的线性插值。
x 超过 xp 的范围时,返回边界值,除非指定 left 和 right 参数。
# 相关系数
corrcoef(x, y=None, rowvar=True, bias=, ddof=)
Return Pearson correlation coefficients = covariance(x, y)/(std(x)*std(y)).
默认是 row wise 计算相关系数,rowvar=False 时按column wise 计算。
x 只支持 1d 或 2d 数组,y 的 shape 与 x 相同。
>>> xarr
array([[0.77395605, 0.43887844, 0.85859792],
[0.69736803, 0.09417735, 0.97562235],
[0.7611397 , 0.78606431, 0.12811363]])
>>> R1 = np.corrcoef(xarr)
array([[ 1. , 0.99256089, -0.68080986],
[ 0.99256089, 1. , -0.76492172],
[-0.68080986, -0.76492172, 1. ]])
>>> yarr
array([[0.45038594, 0.37079802, 0.92676499],
[0.64386512, 0.82276161, 0.4434142 ],
[0.22723872, 0.55458479, 0.06381726]])
>>> R2 = np.corrcoef(xarr, yarr)
array([[ 1. , 0.99256089, -0.68080986, 0.75008178, -0.934284, -0.99004057],
[ 0.99256089, 1. , -0.76492172, 0.82502011, -0.97074098, -0.99981569],
[-0.68080986, -0.76492172, 1. , -0.99507202, 0.89721355, 0.77714685],
[ 0.75008178, 0.82502011, -0.99507202, 1. , -0.93657855, -0.83571711],
[-0.934284 , -0.97074098, 0.89721355, -0.93657855, 1. , 0.97517215],
[-0.99004057, -0.99981569, 0.77714685, -0.83571711, 0.97517215, 1. ]])
# 内积、点积、数量积
np.dot(a, b)
对应元素相乘求和,几何意义是向量a 在 b上的投影长度。对1维数组是向量点积,2维数组是矩阵乘法。
N维情况下比较特殊,是 a 的最后一维与 b 的倒数第二维的点积。
a.shape=(2,3,2) b.shape=(3,2,4) 则 np.dot(a,b).shape=(2,3,3,4)
np.inner(a, b)
对1维数组是向量点积,对多维数组则是 最后一维的点积
# 外积、叉积、向量积
np.cross(a, b)
几何意义是向量a 和 b的法向量,垂直于a和b构成的超平面
# 直方图
(cnts, bins) = histogram(data, bins=10, range=None, normed=None, weights=None, density=None)
bins: 等宽区间数(scalar),或区间边界列表(sequence)
range: 区间上下界 (float, float),默认(data.min, data.max)
normed: deprecated
weights: 数据权重,array like data
density: 是否返回概率,True 时返回值 cnts 中是概率值
cnts: 个数或概率数组
bins: 区间边界列表
>>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
(array([0, 2, 1]), array([0, 1, 2, 3]))
>>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
(array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4]))
结构数组
>>> a = np.zeros(3, dtype=[('foo', np.int32), ('bar', np.float16)])
array([(0, 0.), (0, 0.), (0, 0.)],
dtype=[('foo', '>> a['bar'] *= a['foo']
矩阵运算
Matrix library (numpy.matlib)
线性代数
Linear algebra (numpy.linalg)
字符串运算(链接)
numpy.char 提供字符串相关操作
日期运算
yesterday = np.datetime64('today', 'D') - np.timedelta64(1, 'D')
today = np.datetime64('today', 'D')
tomorrow = np.datetime64('today', 'D') + np.timedelta64(1, 'D')
print(yesterday,today,tomorrow)
#2020-08-25 2020-08-26 2020-08-27
Z = np.arange('2016-07-01', '2016-07-04', dtype='datetime64[D]')
print(Z)
#['2016-07-01' '2016-07-02' '2016-07-03']
Datetime Support Functions
datetime_as_string(arr[, unit, timezone, …])
Convert an array of datetimes into an array of strings.
datetime_data(dtype, /)
Get information about the step size of a date or time type.
Business Day Functions
busdaycalendar([weekmask, holidays])
A business day calendar object that efficiently stores information defining valid days for the busday family of functions.
is_busday(dates[, weekmask, holidays, …])
Calculates which of the given dates are valid days, and which are not.
busday_offset(dates, offsets[, roll, …])
First adjusts the date to fall on a valid day according to the roll rule, then applies offsets to the given dates counted in valid days.
busday_count(begindates, enddates[, …])
Counts the number of valid days between begindates and enddates, not including the day of enddates.
全局函数
a = np.arange(5)
print(np.add.reduce(a)) # 返回scalar
print(np.add.accumulate(a)) # 返回array
print(np.add.reduceat(a, [0,3,1,4,2])) # 返回array, ufunc.reduce(a[indices[i]:indices[i+1]])
print(np.add.outer(a, a)) # 返回ndarray
10
[ 0 1 3 6 10]
[3 3 6 4 9]
[[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]]