PS:机器学习相关代码:https://nbviewer.jupyter.org/github/coderzc/machine_learning/tree/master/jupyter
GitHub源码:https://github.com/coderzc/machine_learning
等有时间整理一下,发出来
Numpy 创建N维数组
import numpy as np
''' 创建10行10列的数值为浮点0的矩阵 '''
>>> print("np.zeros\n", np.zeros([10, 10]))
np.zeros
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
''' 创建10行10列的数值为浮点1的矩阵 '''
>>> print("np.ones\n", np.ones([10, 10]))
np.ones
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
''' 创建10行10列的数值为浮点1的对角矩阵 '''
>>> print("np.eye\n", np.eye(10, 10))
np.eye
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
''' 从数值范围创建数组 开始,结束,步长,输出元素类型 '''
>>> print("np.arange\n", np.arange(0, 100, 2, float))
np.arange
[ 0. 2. 4. 6. 8. 10. 12. 14. 16. 18. 20. 22. 24. 26. 28. 30. 32. 34.
36. 38. 40. 42. 44. 46. 48. 50. 52. 54. 56. 58. 60. 62. 64. 66. 68. 70.
72. 74. 76. 78. 80. 82. 84. 86. 88. 90. 92. 94. 96. 98.]
'''生产随机数组 5行5列 范围0~1'''
>>> np.random.rand(5, 5)
array([[0.79909192, 0.40687012, 0.05833267, 0.90631693, 0.85774438],
[0.65685319, 0.99620959, 0.64195711, 0.28694344, 0.54805126],
[0.87347445, 0.20443748, 0.45883044, 0.90017425, 0.17487183],
[0.4833086 , 0.59498315, 0.75053456, 0.93725983, 0.79870607],
[0.8908418 , 0.49860926, 0.44097606, 0.53744394, 0.21089092]])
''' 生成在半开半闭区间 [low,high)上离散均匀分布的整数值;若high=None,则取值区间变为[0,low) ; size维度 '''
>>> np.random.randint(4,10,size=(5, 5))
array([[6, 6, 6, 7, 9],
[4, 8, 6, 7, 7],
[6, 8, 6, 5, 7],
[7, 8, 8, 4, 5],
[7, 6, 5, 5, 7]])
''' 给定均值/标准差/维度的正态分布 '''
>>> np.random.normal(1.75, 0.1, (3, 4))
array([[1.83246388, 1.73186179, 1.78198763, 1.76844117],
[1.69089184, 1.69620751, 1.78018062, 1.68086896],
[1.86462936, 1.61972878, 1.95645574, 1.66104741]])
''' 将列表转换为np数组 '''
>>> array = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
>>> np_array = np.array(array, dtype=float) # copy,新数组
>>> print("np.array:\n", np_array)
np.array:
[[ 1. 2. 3. 4.]
[ 5. 6. 7. 8.]
[ 9. 10. 11. 12.]]
>>> np_array2 = np.asarray(array, dtype=float) # view,会改变原数组
>>> print("np.asarray:\n", np_array2)
np.asarray:
[[ 1. 2. 3. 4.]
[ 5. 6. 7. 8.]
[ 9. 10. 11. 12.]]
查看数组属性
# 数组元素个数
>>> print("数组元素个数 size:", np_array.size)
数组元素个数 size: 12
# 数组形状
>>> print("数组形状 shape:", np_array.shape)
数组形状 shape: (3, 4)
# 数组维度
>>> print("数组维度 ndim:", np_array.ndim)
数组维度 ndim: 2
# 数组元素类型
>>> print("数组元素类型 dtype:", np_array.dtype)
数组元素类型 dtype: float64
# 数组中每个元素的字节大小
>>> print("数组元素类型 itemsize:", np_array.itemsize)
数组元素类型 itemsize: 8
shape操作
>>> array = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
>>> n1 = np.asarray(array)
# 改变数组的格式
>>> n2 = n1.reshape(6, 2)
>>> print(n1)
[[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]]
>>> print(n2)
[[ 1 2]
[ 3 4]
[ 5 6]
[ 7 8]
[ 9 10]
[11 12]]
# 将多维降到1维展开
>>> print("flatten():", n2.flatten()) # copy,新数组
flatten(): [ 1 2 3 4 5 6 7 8 9 10 11 12]
>>> print("ravel():", n2.ravel()) # view,会改变原数组,却不会改变shape
ravel(): [ 1 2 3 4 5 6 7 8 9 10 11 12]
# 转置
>>> n3 = np.arange(12)
>>> n3 = n3.reshape(3, 4)
>>> print("n3:", n3)
n3: [[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
>>> print("n3.T:", n3.T)
n3.T: [[ 0 4 8]
[ 1 5 9]
[ 2 6 10]
[ 3 7 11]]
# reshape一些特殊值
>>> n4 = np.arange(10, 130, 10)
>>> print("n4:", n4.reshape(4, 3))
n4: [[ 10 20 30]
[ 40 50 60]
[ 70 80 90]
[100 110 120]]
# -1 一维展开 与 ravel()作用相似
>>> print(n4.reshape(-1))
[ 10 20 30 40 50 60 70 80 90 100 110 120]
# (-1,1) n行,1列
>>> print(n4.reshape(-1, 1))
[[ 10]
[ 20]
[ 30]
[ 40]
[ 50]
[ 60]
[ 70]
[ 80]
[ 90]
[100]
[110]
[120]]
# (1,-1) 1行,n列但任然是二维矩阵
>>> print(n4.reshape(1, -1))
[[ 10 20 30 40 50 60 70 80 90 100 110 120]]
数组索引和迭代
>>> print('\n\n')
>>> n5 = np.arange(30)
>>> print('n5:', n5)
n5: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29]
# 获取第一个元素
>>> print(n5[0])
0
# 获取倒数第一个元素
>>> print(n5[-1])
29
# 取前十个数
>>> print(n5[:10])
[0 1 2 3 4 5 6 7 8 9]
# 取后十个数
>>> print(n5[-10:])
[20 21 22 23 24 25 26 27 28 29]
# 取前11-20个数,左闭右开
>>> print(n5[10:20])
[10 11 12 13 14 15 16 17 18 19]
# 前十个数中,每2个数取一个
>>> print(n5[:10:2])
[0 2 4 6 8]
# 第6-15个数中,每3个数取一个
>>> print(n5[5:15:3])
[ 5 8 11 14]
# 所有的数中,每10个数取一个
>>> print(n5[::10])
[ 0 10 20]
# 什么都不写,可以原样复制一个数组
>>> print(n5[:])
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29]
############### 多维数组索引与切片 ###############
>>> n6 = n5.reshape(5, 6)
>>> print('n6:', n6)
n6: [[ 0 1 2 3 4 5]
[ 6 7 8 9 10 11]
[12 13 14 15 16 17]
[18 19 20 21 22 23]
[24 25 26 27 28 29]]
# 索引第二行第三列的元素
>>> print('n6[1, 2]:', n6[1, 2])
n6[1, 2]: 8
# 在第一维取前两行,第二维每+2取一个元素
>>> print('n6[:2, ::2]:\n', n6[:2, ::2])
n6[:2, ::2]:
[[ 0 2 4]
[ 6 8 10]]
# 取第一列
>>> print('n6[:, 0]]:\n', n6[:, 0])
n6[:, 0]]:
[ 0 6 12 18 24]
# 取第2、3列
>>> print('n6[:, 3:5]]:\n', n6[:, 3:5])
n6[:, 3:5]]:
[[ 3 4]
[ 9 10]
[15 16]
[21 22]
[27 28]]
拼接、分割
>>> A=np.arange(10,20).reshape(5,2)
>>> print(A)
[[10 11]
[12 13]
[14 15]
[16 17]
[18 19]]
>>> B=np.arange(20,30).reshape(5,2)
>>> print(B)
[[20 21]
[22 23]
[24 25]
[26 27]
[28 29]]
# 垂直拼接
>>> C=np.vstack([A,B]) #C=np.r_[A,B]
>>> print(C)
[[10 11]
[12 13]
[14 15]
[16 17]
[18 19]
[20 21]
[22 23]
[24 25]
[26 27]
[28 29]]
# 自我堆叠
>>> v=np.asarray([1,2])
>>> a=np.vstack([v]*2)
>>> print(a)
[[1 2]
[1 2]]
# 横向堆叠两次,纵向堆叠一次
>>> b=np.tile(v,(2,1))
>>> print(b)
[[1 2]
[1 2]]
# 水平拼接
>>> C=np.hstack([A,B]) #C=np.c_[A,B]
>>> print(C)
[[10 11 20 21]
[12 13 22 23]
[14 15 24 25]
[16 17 26 27]
[18 19 28 29]]
#列组合column_stack([A,B]) 一维数组:按列方向组合 二维数组:同hstack一样
>>> A=np.arange(10,20)
>>> print(A)
[10 11 12 13 14 15 16 17 18 19]
#行组合row_stack([A,B]) 一维数组:按行方向组合 二维数组:同vstack一样
基础运算
>>> n7 = np.asarray([10, 20, 30,40])
>>> n8 = np.arange(4)
>>> print(n7)
[10 20 30 40]
>>> print(n8)
[0 1 2 3]
# 计算立方
>>> print(n7**3)
[ 1000 8000 27000 64000]
# 三角函数
>>> print(np.sin(n7))
[-0.54402111 0.91294525 -0.98803162 0.74511316]
# 指定轴最大/小值
>>> print(np.amax(n7, axis=0))
40
>>> print(np.amin(n7, axis=0))
10
# 平均值
>>> print(np.mean(n7, axis=0))
25.0
# 中位数
>>> print(np.median(n7))
25.0
# 方差
>>> print(n7.var())
125.0
# 标准差
>>> print(np.std(n7, axis=0))
11.180339887498949
# 差值
>>> print("n7-n8:",n7-n8)
n7-n8: [10 19 28 37]
# 逐个相乘非矩阵乘法
>>> n9 = np.asarray([[1,1],[0,1]])
>>> print(n9)
[[1 1]
[0 1]]
>>> n10=np.arange(4).reshape((2,2))
>>> print(n10)
[[0 1]
[2 3]]
>>> print(n9 * n10)
[[0 1]
[0 3]]
矩阵计算
# Ax=B 求解x
>>> A = np.array([[2, 1, -2], [3, 0, 1], [1, 1, -1]])
>>> B = np.transpose(np.array([[-3, 5, -2]]))
>>> x = np.linalg.solve(A, B)
>>> print('x:\n', x)
x:
[[ 1.]
[-1.]
[ 2.]]
# 矩阵相乘 C=AB 求解C
>>> A = np.array([[3, 2, -2], [3, 1, 4], [3, 1, -2]])
>>> B = np.arange(9).reshape((3,3))
>>> C = np.dot(A, B)
>>> print('C:\n',C)
C:
[[-6 -3 0]
[27 35 43]
[-9 -7 -5]]
# 矩阵乘向量
>>> v=np.asarray([1,2])
>>> print(v)
[1 2]
>>> A=np.arange(1,5).reshape(2,2)
>>> print(A)
[[1 2]
[3 4]]
>>> D=v.dot(A)
>>> print(D)
[ 7 10]
# 自动将v转换为列向量,结果有自动转化为行向量
>>> C=A.dot(v)
>>> print(C)
[ 5 11]
# 矩阵的逆
>>> A=np.arange(1,5).reshape(2,2)
>>> print(A)
[[1 2]
[3 4]]
>>> invA=np.linalg.inv(A)
>>> print(invA)
[[-2. 1. ]
[ 1.5 -0.5]]
# 矩阵乘以矩阵的逆等于单位矩阵对角线都为1,其他为0,这里有浮点误差
>>> print(A.dot(invA))
[[1.00000000e+00 1.11022302e-16]
[0.00000000e+00 1.00000000e+00]]
# 对于非方阵求伪逆矩阵
>>> A=np.arange(1,11).reshape(2,5)
>>> print(A)
[[ 1 2 3 4 5]
[ 6 7 8 9 10]]
>>> pinvA=np.linalg.pinv(A)
>>> print(pinvA)
[[-0.36 0.16]
[-0.2 0.1 ]
[-0.04 0.04]
[ 0.12 -0.02]
[ 0.28 -0.08]]
>>> print(A.dot(pinvA))
[[ 1.00000000e+00 -1.11022302e-16]
[-8.88178420e-16 1.00000000e+00]]
排序与arg
>>> x=np.asarray([2,6,7,1,4,5,8,3,10,9])
>>> print(x)
[ 2 6 7 1 4 5 8 3 10 9]
# argxxx 索引函数
>>> print(np.argmax(x)) #最大数的索引为8
8
>>> print(np.argmin(x))
3
# 排序
>>> print(np.sort(x))
[ 1 2 3 4 5 6 7 8 9 10]
# x原地排序
>>> x.sort()
>>> print(x)
[ 1 2 3 4 5 6 7 8 9 10]
#打乱顺序
>>> np.random.shuffle(x)
>>> print(x)
[ 9 3 5 6 2 1 8 4 7 10]
# 返回排序索引
>>> print(np.argsort(x))
[5 4 1 7 2 3 8 6 0 9]
# 划分大于3和小于3 (快排子过程)
>>> print(np.partition(x,3))
[ 2 1 3 4 5 6 8 7 9 10]
FancyIndexing与np比较
>>> x=np.arange(16)
>>> print(x)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
# FancyIndexing
>>> a=[3,5,8]
>>> print(x[a])
[3 5 8]
# np数组比较
>>> x<3
array([ True, True, True, False, False, False, False, False, False,
False, False, False, False, False, False, False])
>>> 2*x==24-4*x
array([False, False, False, False, True, False, False, False, False,
False, False, False, False, False, False, False])
# 小于等于3的元素Ture/False序列
>>> i=(x<=3)
>>> print(i)
[ True True True True False False False False False False False False
False False False False]
>>> print(x[i])
[0 1 2 3]
>>> np.sum(i) # 对值Ture累加记数
4
# 是否含有零元素
>>> np.any(x==0)
True
# 是否都等于零
>>> np.all(x==0)
False
# 判断两个数组是否相等
>>> np.all(x==x)
True
读取数据
>>> np.genfromtxt("http://aima.cs.berkeley.edu/data/iris.csv", delimiter=",",skip_header=0,dtype="f8,f8,f,i4,|S8")
array([(5.1, 3.5, 1.4, 0, b'setosa'), (4.9, 3. , 1.4, 0, b'setosa'),
(4.7, 3.2, 1.3, 0, b'setosa'), (4.6, 3.1, 1.5, 0, b'setosa'),
(5. , 3.6, 1.4, 0, b'setosa'), (5.4, 3.9, 1.7, 0, b'setosa'),
(4.6, 3.4, 1.4, 0, b'setosa'), (5. , 3.4, 1.5, 0, b'setosa'),
(4.4, 2.9, 1.4, 0, b'setosa'), (4.9, 3.1, 1.5, 0, b'setosa'),
(5.4, 3.7, 1.5, 0, b'setosa'), (4.8, 3.4, 1.6, 0, b'setosa'),
......
......
(5.8, 2.7, 5.1, 1, b'virginic'), (6.8, 3.2, 5.9, 2, b'virginic'),
(6.7, 3.3, 5.7, 2, b'virginic'), (6.7, 3. , 5.2, 2, b'virginic'),
(6.3, 2.5, 5. , 1, b'virginic'), (6.5, 3. , 5.2, 2, b'virginic'),
(6.2, 3.4, 5.4, 2, b'virginic'), (5.9, 3. , 5.1, 1, b'virginic')],
dtype=[('f0', '