最近看到同事用sckit-learn的统计n-gram的库省去了好多麻烦,决定从基础开始看sckit-learn,从numpy基础重新开始复习,尽量每天一篇,内容会同步到https://github.com/pangdaxing/sckit-learn-notebook中
import numpy as np
a = np.arange(15).reshape(3, 5)
print("a:")
print(a)
print("a.shape:")
print(a.shape)
print("a.dim:")
print(a.ndim)
print("a.type")
print(a.dtype.name)
print("a.itemsize")
print(a.itemsize)
print("a.size")
print(a.size)
print("type(a):")
print(type(a))
import numpy as np
## 通过python的list创建
a = np.array([2, 3, 4])
print("a:")
print(a)
print("a.type:")
print(a.dtype)
print("a.itemsize:")
print(a.itemsize)
b = np.array([(1.5, 2, 3), (4, 5, 6)])
print("b:")
print(b)
print("b.type:")
print(b.dtype)
print("a.itemsize:")
print(b.itemsize)
c = np.array([[1, 2], [3, 4]], dtype=complex)
print("c:")
print(c)
print("c.type:")
print(c.dtype)
print("c.itemsize:")
print(c.itemsize)
## 通过numpy的函数创建
print("np.zeros(3,4):")
print(np.zeros((3, 4)))
#调用函数的时候同样可以指定dtype
print("np.ones((2,3,5),dtype=np.int64")
print(np.ones((2, 3, 5), dtype=np.int64))
#返回一个没有初始化的矩阵,初始值是任意的
print("np.empty((3,4), dtype=np.int32):")
print(np.empty((3, 4), dtype=np.int32))
#可以用类似range的arange创建list,不同的是返回的是np.narray
print("np.arrage(1,3,0.3):")
print(np.arange(1, 3, 0.3))
#因为arrange采用浮点步长的时候,表示精度会带来误差,返回数组长度无法完全可控,更好的生成序列的方式是用linespace
#注意linespace的最后一个参数是区间数,而arrange是步长
print("np.linespace(0, 2*pi, 100)")
print(np.linspace(0, 2.*pi, 100))
import numpy as np
a = np.array([20,30,40,50])
print("a:")
print(a)
b = np.arange(4)
print("b:")
print(b)
print("a-b:")
print(a-b)
print("b**2")
print(b**2)
print("10*np.sin(a)")
print(10*np.sin(a))
print("a<35")
print(a<35)
#注意在numpy中*是叉乘,*.dot是按位相乘(相当于多维的点乘法)
print("a*b")
print(a*b)
print("np.dot(a,b.transpose()")
print(np.dot(a,b.transpose()))
print("np.dot(a.transpose(),b")
print(np.dot(a.transpose(),b))
import numpy as np
a = np.ones((2,3), dtype=int)
print("a")
print(a)
b = np.random.random((2,3))
print("b")
print(b)
a *= 3
print("a=a*3:")
print(a)
b += a
print("b+=a")
print(b)
#不能被隐式转换
#a += b
import numpy as np
a = np.random.random((2, 3))
print("a:")
print(a)
# 不指定坐标轴默认全局统计
print("a.sum:")
print(a.sum())
print("a.min:")
print(a.min)
print("a.max:")
print(a.max)
#指定坐标轴就会沿着轴向统计
print("a.sum(axis=1)")
print(a.sum(axis=1))
print("a.sum(axis=0):")
print(a.sum(axis=0))
import numpy as np
B = np.random.random((4,5))
print("B:")
print(B)
print("np.exp(B):")
print(np.exp(B))
print("np.sqrt(B)")
print(np.sqrt(B))
import numpy as np
##一维矩阵的索引方法
a = np.arange(10)**3
print("a:")
print(a)
print("a[2]:")
print(a[2])
print("a[2:5]:")
print(a[2:5])
a[:6:2] = -1000
print("a[:6:2] = -1000") #相当于a[0:6:2]
print(a)
print("a[::-1]:") #相当于把a反转
print(a[::-1])
##多维矩阵的索引方法
def f(x,y):
return 10*x+y
#fromfuction相当于传入函数句柄和矩阵的索引,返回函数计算值到索引位置
b = np.fromfunction(f,(5,4),dtype=int)
print("b")
print(b)
print("b[2,3]:")
print(b[2,3])
print("b[0:5,1]")
print(b[0:5,1])
print("b[:,1]")
print(b[:,1])
print("b[1:3,2:3]:")
print(b[1:3,2:3])
print("b[-1]") #相当于b[-1,:]
print(b[-1])
print("b[:,-1]")
print(b[:,-1])
# ...的使用
c = np.empty([2,3,4],dtype=int)
print("c:")
print(c)
print("c.shape:")
print(c.shape)
print("c[1,...]")
print(c[1,...])
#只能有一个...出现在索引中
#print("c[...,2,...]:")
#print(c[...,2 ,...])
# 矩阵按照行和元素遍历
print("for row in c:")
for row in c:
print(row)
print("for elem in c.flat:")
for elem in c.flat:
print(elem)
import numpy as np
b = np.array(range(0,3,1))
print("b:")
print(b)
c = np.array(range(5,8,1))
print("c:")
print(c)
cstack_bc = np.column_stack((b,c))
print("cstack_bc:")
print(cstack_bc)
rstack_bc = + np.hstack((b,c))
print("rstack_bc:")
print(rstack_bc)
#注意这里newaxis的使用,相当于增加了一个维度
print("np.vstack((b[:,newaxis],[:,newaxis])":)
print(np.vstack((b[:,newaxis],[:,newaxis])))
npr = np.r_[1:4,0,9]
print("npr:")
print(npr)
npc = np.c_[np.array([[1,2,3]]), 0, 0, np.array([[4,5,6]])]
print("npc:")
print(npc)
让不同shape的array能够进行数学运算
image (3d array): 256 x 256 x 3
cale (1d array): 3
Result (3d array): 256 x 256 x 3
A (4d array): 8 x 1 x 6 x 1
B (3d array): 7 x 1 x 5
Result (4d array): 8 x 7 x 6 x 5
A (1d array): 3
B (1d array): 4 # trailing dimensions do not match
A (2d array): 2 x 1
B (3d array): 8 x 4 x 3 # second from last dimensions mismatched
import numpy as np
x = np.arange(4)
print("x:")
print(x)
xx = x.reshape(4, 1)
print("xx:")
print(xx)
y = np.ones(5)
print("y:")
print(y)
z = np.ones((3, 4))
print("z:")
print(z)
# 可行的broadcast操作
tmp = x + xx
print("x+xx:")
print(tmp)
tmp = x * xx
print("x*xx:")
print(tmp)
# error size
#x+y
# 可行的多维矩阵操作
print("x shape")
print(x.shape)
print("z shape")
print(z.shape)
tmp = x - z
print("x-z:")
print(tmp)
import numpy as np
a = np.arange(12) ** 2 # 0到12 的平方构成的array
print("a:")
print(a)
i = np.array([1, 1, 3, 8, 5]) # index的array
print("a[i]:")
print(a[i])
j = np.array([[3, 4], [9, 7]]) # 多维索引
print("a[j]:") # 返回的数组维度和索引数组的维度一样
print(a[j])
#设置一个调色盘
palette = np.array([[0, 0, 0], # black
[255, 0, 0], # red
[0, 255, 0], # green
[0, 0, 255], # blue
[255, 255, 255]]) # white
print("palette:")
print(palette)
#使用调色盘的颜色绘制一副简单的图像
image_idx = np.array([[0, 1, 2, 0],
[0, 3, 4, 0]])
print("image_idx:")
print(image_idx)
# 图像的size为 (2,4,3)
image = palette[image_idx]
print("image_shape:")
print(image.shape)
print("image:")
print(image)
##多维index例子
a = np.arange(12).reshape(3, 4)
print("a:")
print(a)
i = np.array([[0, 1], # 索引a的第一维
[1, 2]])
print("i:")
print(i)
j = np.array([[2, 1], # 索引a的第二维
[3, 3]])
print("j:")
print(j)
print("a[i]:")
print(a[i])
print("a[i,j]:")
print(a[i, j]) #i和j的维度要相同
print("a[i,2]:")
print(a[i, 2])
print("a[:]:")
print(a[:])
print("a[:,j]:")
print(a[:, j])
l = [i, j] #也可以合并成一个数组取值,效果同a[i,j]
print("a[l]:")
print(a[l])
#注意不能把index的tuple作为array传入,这样会认为是要做第一维的索引
#s = np.array( [i,j] )
#a[s]
import numpy as np
time = np.linspace(20, 145, 5) # 时间轴是一个5维的向量
print("time:")
print(time)
data = np.sin(np.arange(20)).reshape(5, 4) # 时间轴上面抽样了4个序列
print("data:")
print(data)
ind = data.argmax(axis=0) # 找到每个序列的最大值对应的index
print("ind:")
print(ind)
time_max = time[ind] # 最大值对应的时间序列的值
print("time_max:")
print(time_max)
data_max = data[ind, range(data.shape[1])] #通过index取得每个序列的最大值成为一个向量,注意range的用法
print("data_max:")
print(data_max)
print("data_max==data.max(axit=0):")
print([data_max==data.max(axis=0)])
print("np.all(data_max == data.max(axis=0)")
print(np.all(data_max == data.max(axis=0)))
import numpy as np
a = np.arange(5)
print("a:")
print(a)
a[[1, 3, 4]] = 0
print("After a[[1,3,4]] = 0:")
print(a)
a[[0, 0, 2]] = [1, 2, 3]
print("After a[[0,0,2]]=[1,2,3]:")
print(a)
a[[0, 0, 2]] += 1
print("After a[[0,0,2]]+=1:")
print(a)
a = np.arange(12).reshape(3, 4)
b = a > 4 #b最后就是a形状的shape
print("After a > 4:")
print(b)
print("a[b]:")
print(a[b])
#bool 型的index还可以用来作为行和列的选择
a = np.arange(12).reshape(3,4)
b1 = np.array([False,True,True]) # 选择第一维度
b2 = np.array([True,False,True,False]) # 选择第二维
# 选择满足条件的行
print("a[b1,:]:")
print(a[b1,:])
print("a[b1]:")
print(a[b1])
a[:,b2] # 选择满足条件的列
print("a[:,b2]:")
print(a[b])
# 选择满足条件的行和列
print("a[b1,b2] :")
print(a[b])
import numpy as np
a = np.array([[1.0, 2.0], [3.0, 4.0]])
print("a")
print(a)
#矩阵转秩
print("transpose:")
print(a.transpose())
#矩阵求逆
print('inverse:')
print(np.linalg.inv(a))
#得到单位方阵
print("I:")
print(np.eye(2))
#矩阵叉乘
j = np.array([[0.0, -1.0], [1.0, 0.0]])
print("j:")
print(j)
print("np.dot(j,j):")
print(np.dot (j, j))# matrix product
#求方阵的迹
print("trace of a :")
print(np.trace(a))# trace
#解方程
x = np.array([[2.,3.],[4.,5.]])
y = np.array([[5.], [7.]])
print("x:")
print(x)
print("y:")
print(y)
print("solve wx=y, w:")
print(np.linalg.solve(x, y))
#求特征向量
print("eig of a")
print(np.linalg.eig(a))