现有一组数据data,每一个数据可以看成 x 1 , x 2 x_{1},x_{2} x1,x2
data = [[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]]
现需要按照 x 1 x_{1} x1从小到大排序数组。
1.1. 如果是array,则排序,交换元素下标即可。
for i in range(len(data)):
for j in range(i + 1, len(data)):
if data[i][0] > data[j][0]:
data[i], data[j] = data[j], data[i]
1.2. 如果是ndarray,则排序,交换元素需要copy传递元素
for i in range(len(data)):
for j in range(i + 1, len(data)):
if data[i][0] > data[j][0]:
temp = np.copy(data[i]) # 此处使用copy传递,而非直接赋值
data[i] = data[j]
data[j] = temp
或者直接交换下标
for i in range(len(data)):
for j in range(i + 1, len(data)):
if data[i][0] > data[j][0]:
data[[i, j], :] = data[[j, i], :]
x = np.linspace(0, 10, 100)
print(x)
linspace参数详解:
param | desc |
---|---|
start | 取值范围的起始值 |
end | 取值范围的结束值 |
num | 取值范围内取值个数 |
numpy.c_[] 和 numpy.r_[] 分别为添加列和添加行。
a、numpy.c_[]
a = np.array([[1, 2, 3], [4, 5, 6]]) # shape(2,3)
b = np.array([[0, 0, 0]]) # shape(1,3)
c = np.r_[a, b]
print(c)
"""
[[1 2 3]
[4 5 6]
[0 0 0]]
"""
b、numpy.r_[]
d = np.array([[1, 2], [3, 4], [5, 6]]) # shape(3,2)
e = np.array([[1], [2], [3]]) # shape(3,1)
f = np.c_[d, e]
print(f)
"""
[[1 2 1]
[3 4 2]
[5 6 3]]
"""
两个函数均是将数组扁平化。
a = np.arange(12).reshape(3, 4)
print(a)
"""
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
"""
print(np.ravel(a))
"""
[ 0 1 2 3 4 5 6 7 8 9 10 11]
"""
print(a.flatten())
"""
[ 0 1 2 3 4 5 6 7 8 9 10 11]
"""
区别:ravel改变原来数组的元素,慎用
# 创建一个和a相同内容的数组b
b = a.copy()
c = a.ravel()
print(c)
# [ 0 1 2 3 4 5 6 7 8 9 10 11]
d = b.flatten()
c[1] = 99
print(a)
"""
[[ 0 99 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
"""
print(d)
# [ 0 1 2 3 4 5 6 7 8 9 10 11]
d[1] = 99
print(b)
"""
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
"""
arange参数详解:
param | desc |
---|---|
start | 最小值 |
stop | 最大值 |
step | 步长 |
dtype | 类型 |
import numpy as np
data = np.arange(1, 10, 1)
print(data) # [1 2 3 4 5 6 7 8 9]
xx, yy = np.meshgrid(np.arange(1, 10, 1), np.arange(1, 8, 1))
print(xx.shape) # (7, 9)
xx_f = xx.flatten()
print(xx_f.shape) # (63,)
data = np.c_[xx_f, yy.flatten()]
print(data.shape) # (63, 2)
a = np.arange(8).reshape(4, 2)
print(a)
"""
[[0 1]
[2 3]
[4 5]
[6 7]]
"""
b = np.arange(8, 12).reshape(2, 2)
print(b)
"""
[[ 8 9]
[10 11]]
"""
c = np.arange(8).reshape(2, 4)
print(c)
"""
[[0 1 2 3]
[4 5 6 7]]
"""
"""
np.r_ 将数组的行合并,要求axis=1 的维度值一致
参照第3节
"""
print(np.r_[a, b])
"""
[[ 0 1]
[ 2 3]
[ 4 5]
[ 6 7]
[ 8 9]
[10 11]]
"""
"""
np.concatenate 将矩阵合并,将指定的axis合并,axis=0,按行合并
axis=1,按列合并
"""
print(np.concatenate((a, b)))
"""
[[ 0 1]
[ 2 3]
[ 4 5]
[ 6 7]
[ 8 9]
[10 11]]
"""
print(np.concatenate((c, b), axis=1))
"""
[[ 0 1 2 3 8 9]
[ 4 5 6 7 10 11]]
"""
\qquad numpy.var 求矩阵或者向量的方差,axis表示方向
\qquad numpy.cov 求矩阵的协方差矩阵,或者2个向量的协方差矩阵。
"""
1、两个向量的方差
ddof=1 表示1/n-1
默认ddof=0 表示1/n
"""
a = [1, 2, 3, 4, 5]
b = [1, 3, 4, 5, 6]
print(np.var(a, ddof=1)) # 2.5
"""
2、矩阵的方差
"""
c = [a, b]
print(np.var(c, axis=1, ddof=1)) # [2.5 3.7]
"""
3、两个向量的协方差
"""
print(np.cov(a, b))
"""
[[2.5 3. ]
[3. 3.7]]
"""
"""
4、矩阵的协方差
"""
c = [a, b]
print(np.cov(c))
"""
[[2.5 3. ]
[3. 3.7]]
"""
"""
5、归一化的m个n维向量,协方差和数据(n,m)之间的关系
cov = 1/(m-1) xx.T
该协方差的维度(n,n)
"""
# 定义3个向量
a = [1, 2, 3, 4, 5]
b = [4, 5, 6, 7, 8]
c = [7, 8, 9, 10, 11]
d = np.array([a, b, c]) # shape = (3,5)
# 标准化
mean = np.mean(d, axis=1)
d = d - mean.reshape(3, 1)
print(d)
"""
[[-2. -1. 0. 1. 2.]
[-2. -1. 0. 1. 2.]
[-2. -1. 0. 1. 2.]]
"""
# 求协方差
print(np.cov(d)) # cov = 1/(m-1) xx.T
"""
[[2.5 2.5 2.5]
[2.5 2.5 2.5]
[2.5 2.5 2.5]]
"""
# 求cov * (m-1) 的迹
print(np.trace(np.cov(d) * (5 - 1))) # 30.0
# 将 \sum_{i=1}^m x_i^T x_i
print(np.matmul(d[0, :], d[0, :].T) + np.matmul(d[1, :], d[1, :].T) + np.matmul(d[2, :], d[2, :].T)) # 30.0
\qquad numpy.linalg.eig求矩阵的特征向量和特征值
\qquad numpy.linalg.inv求矩阵的逆
x = np.array([[-1, 1, 0],
[-4, 3, 0],
[1, 0, 2]])
e, f = np.linalg.eig(x)
print(e)
"""
[2. 1. 1.]
"""
print(f) # 标准化后的特征向量,为列向量
"""
[[ 0. 0.40824829 0.40824829]
[ 0. 0.81649658 0.81649658]
[ 1. -0.40824829 -0.40824829]]
"""
a = np.array([[1, 2], [3, 4]])
print(np.linalg.inv(a))
"""
[[-2. 1. ]
[ 1.5 -0.5]]
"""
\qquad python的数组index只能是integer类型的,但是ndarray可以使用布尔值的列表作为索引,获取True对应索引的值。
import numpy as np
a = [1, 2, 3, 4]
b = [True, True, False, False]
# print(a[b]) # TypeError: list indices must be integers or slices, not list
a = np.array(a)
print(a[b]) # [1 2]
b = [True, True, False, False, False]
print(a[b]) # IndexError: boolean index did not match indexed array along dimension 0; dimension is 4 but corresponding boolean dimension is 5
"""
索引的布尔列表长度需要和ndarray长度一致
"""
\qquad nonzero返回ndarray的非零数值索引以及数据类型。
import numpy as np
# 新建一个全零数组
a = np.zeros(10)
print(a) # [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
a[0] = 1
a[2] = 2
print(a) # [1. 0. 2. 0. 0. 0. 0. 0. 0. 0.]
print(a.nonzero()) # (array([0, 2], dtype=int64),)
print(a.nonzero()[0]) # [0 2]