了解tensor 及其相关方法的使用,了解求导机制。
1)初始化矩阵:
import torch
M = torch.rand(1,3)
N = torch.rand(2,1)
print(M)
print(N)
M1 = torch.tensor([[1,2,3]])
N1 = torch.tensor([[4],
[5]])
print(M1)
print(N1)
运行结果:
tensor([[0.0561, 0.1807, 0.6618]])
tensor([[0.3556],
[0.0399]])
tensor([[1, 2, 3]])
tensor([[4],
[5]])
2)三种方式相减:
print(M - N)
print(M1 - N1)
print(torch.subtract(M , N))
print(torch.subtract(M1 , N1))
M.subtract_(N)
print(M)
M1.subtract_(N1)
print(M1)
运行结果:
tensor([[-0.2995, -0.1749, 0.3061],
[ 0.0162, 0.1408, 0.6218]])
tensor([[-3, -2, -1],
[-4, -3, -2]])
tensor([[-0.2995, -0.1749, 0.3061],
[ 0.0162, 0.1408, 0.6218]])
tensor([[-3, -2, -1],
[-4, -3, -2]])
RuntimeError Traceback (most recent call last)
Cell In[7], line 7
4 print(torch.subtract(M , N))
5 print(torch.subtract(M1 , N1))
----> 7 M.subtract_(N)
8 print(M)
9 M1.subtract_(N1)
RuntimeError: output with shape [1, 3] doesn't match the broadcast shape [2, 3]
分析:
这里最后一种方法是会报错的,不能输出结果,因为M和N的shape维度不同,subtract_函数无法进行。
操作:
import torch
P = torch.normal(0, 0.01, [3,2])
Q = torch.normal(0, 0.01, [4,2])
print(P)
print(Q)
QT = Q.t()
print(QT)
print(torch.mm(P, QT))
运行结果:
tensor([[ 0.0014, -0.0035],
[-0.0088, -0.0016],
[-0.0094, -0.0096]])
tensor([[ 0.0054, -0.0040],
[ 0.0007, -0.0021],
[-0.0013, 0.0001],
[-0.0085, 0.0035]])
tensor([[ 0.0054, 0.0007, -0.0013, -0.0085],
[-0.0040, -0.0021, 0.0001, 0.0035]])
tensor([[ 2.1574e-05, 8.4666e-06, -2.3473e-06, -2.3999e-05],
[-4.1593e-05, -2.8831e-06, 1.1574e-05, 6.9699e-05],
[-1.2835e-05, 1.3717e-05, 1.1232e-05, 4.6976e-05]])
操作:
import torch
x = torch.tensor(1.0, requires_grad = True)
print(x)
print(x.grad)
y1 = x ** 2
with torch.no_grad(): #终端对x3的追踪
y2 = x ** 3
y3 = y1 + y2
y3.backward()
print(x.grad)
运行结果:
tensor(1., requires_grad=True)
None
tensor(2.)
分析:
根据求导法则可以得到y3’ = 2x + 3x^2,但是x3中断了追踪,所以当x = 1时,y3对x的梯度为2,输出结果tensor(2.)
1.人工生成数据集
2.了解二分类思想
3.了解Sigmoid函数
4.了解 bce loss
构建数据集
num_inputs = 2 #特征数
n_data = torch.ones(1000, num_inputs)
x1 = torch.normal(2 * n_data, 1) #shape = (1000,2)
y1 = torch.ones(1000) #shape = (1000, 1)
x2 = torch.normal(-2 * n_data, 1)
y2 = torch.zeros(1000)
构建训练集
trainfeatures = torch.cat((x1[:train_index], x2[:train_index]), 0).type(torch.FloatTensor)
trainlabels = torch.cat((y1[:train_index], y2[:train_index]), 0).type(torch.FloatTensor)
print(len(trainfeatures))
构建测试集
testfeatures = torch.cat((x1[train_index:], x2[train_index:]), 0).type(torch.FloatTensor)
testlabels = torch.cat((y1[train_index:], y2[train_index:]), 0).type(torch.FloatTensor)
可视化生成数据集
plt.scatter(trainfeatures.data.numpy()[:, 0],
trainfeatures.data.numpy()[:, 1],
c=trainlabels.data.numpy(),
s=5, lw=0, cmap='RdYlGn' )
plt.show()
读取数据
def data_iter(batch_size,features,labels):
num_examples=len(features)
indices=list(range(num_examples))
np.random.shuffle(indices) #随机读取数据
for i in range(0,num_examples,batch_size):
j=torch.LongTensor(indices[i:min(i+batch_size,num_examples)])
yield features.index_select(0,j),labels.index_select(0,j)
初始化参数
w=torch.tensor(np.random.normal(0,0.01,(num_inputs,1)),dtype=torch.float32)
b=torch.zeros(1,dtype=torch.float32)
实现逻辑回归
def logits(X, w, b):
y = torch.mm(X, w) + b
return 1/(1+torch.pow(np.e,-y))
实现二次交叉熵损失函数
def logits_loss(y_hat, y):
y = y.view(y_hat.size())
return -y.mul(torch.log(y_hat))-(1-y).mul(torch.log(1-y_hat))
优化函数
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size
测试集准确率
def evaluate_accuracy():
acc_sum,n,test_l_sum = 0.0, 0 ,0
for X,y in data_iter(batch_size, testfeatures, testlabels):
y_hat = net(X, w, b)
y_hat = torch.squeeze(torch.where(y_hat>0.5,torch.tensor(1.0),torch.tensor(0.0)))
acc_sum += (y_hat==y).float().sum().item()
l = loss(y_hat,y).sum()
test_l_sum += l.item()
n += y.shape[0]
return acc_sum/n,test_l_sum/n
lr = 0.0005 #学习率
num_epochs = 300 #迭代次数
net = logits #逻辑函数
loss = logits_loss #损失函数
batch_size = 50
test_acc,train_acc= [],[]
train_loss,test_loss =[],[]
for epoch in range(num_epochs): # 训练模型一共需要num_epochs个迭代周期
train_l_sum, train_acc_sum,n = 0.0,0.0,0
模型定义
class LogisticRegression(nn.Module):
def __init__(self,n_features):
super(LogisticRegression, self).__init__()
self.lr = nn.Linear(n_features, 1)
self.sm = nn.Sigmoid()
def forward(self, x): #向前传播
x = self.lr(x)
x = self.sm(x)
return x
#测试集准确率
def evaluate_accuracy():
acc_sum,n,test_l_sum = 0.0, 0 ,0
for X,y in test_data_iter:
y_hat = logistic_model(X)
y_hat = torch.squeeze(torch.where(y_hat>0.5,torch.tensor(1.0),torch.tensor(0.0)))
acc_sum += (y_hat==y).float().sum().item()
n += y.shape[0]
return acc_sum/n,test_l_sum/n
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import sys
print("torch.__version__:", torch.__version__)
print("torchvision.__version__:", torchvision.__version__)
#读取数据集
batch_size = 256
mnist_train = torchvision.datasets.FashionMNIST(root="...\\train_data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root="...\\test_data", train=False, download=True, transform=transforms.ToTensor())
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)
#交叉熵损失函数
def cross_entropy(y_hat,y):
return - torch.log(y_hat.gather(1,y.view(-1,1)))
#优化函数
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size
# 初始化模型参数
num_inputs = 784 # 输入是28x28像素的图像,所以输入向量长度为28*28=784
num_outputs = 10 # 输出是10个图像类别
W = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_outputs)),dtype=torch.float) # 权重参数为784x10
b = torch.zeros(num_outputs,dtype=torch.float) # 偏差参数为1x10
# 追踪模型参数梯度
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
# 实现softmax
def softmax(X):
X_exp = X.exp() # 通过exp函数对每个元素做指数运算
partition = X_exp.sum(dim=1, keepdim=True) # 对exp矩阵同行元素求和
return X_exp / partition # 矩阵每行各元素与该行元素之和相除
# 模型定义
def net(X):
return softmax(torch.mm(X.view((-1,num_inputs)),W)+b)
# 计算分类准确率
def evaluate_accuracy(data_iter,net):
acc_sum,n,test_l_sum= 0.0, 0, 0.0
for X,y in data_iter:
acc_sum += (net(X).argmax(dim = 1) == y).float().sum().item()
l = loss(net(X),y).sum()
test_l_sum += l.item()
n += y.shape[0]
return acc_sum/n,test_l_sum/n
# 模型训练
num_epochs,lr = 50, 0.1
test_acc,train_acc= [],[]
train_loss,test_loss =[],[]
loss = cross_entropy
params = [W,b]
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X,y in train_iter:
y_hat = net(X)
l = loss(y_hat,y).sum()
l.backward()
sgd(params, lr, batch_size)
W.grad.data.zero_()
b.grad.data.zero_()#默认一开始梯度为零,所以梯度清零放在for循环的最后
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_a,test_l = evaluate_accuracy(test_iter, net)
test_acc.append(test_a)
test_loss.append(test_l)
train_acc.append(train_acc_sum/n)
train_loss.append(train_l_sum/n)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_loss[epoch], train_acc[epoch], test_acc[epoch]))
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import sys
#读取数据集
batch_size = 256
mnist_train = torchvision.datasets.FashionMNIST(root="...\\test_data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root="...\\train_data", train=False, download=True, transform=transforms.ToTensor())
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)
#初始化模型
net = torch.nn.Sequential(nn.Flatten(),
nn.Linear(784,10))
#初始化模型参数
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight,std = 0.01)
net.apply(init_weights)
#定义损失函数和优化器
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr = 0.1)
# 计算分类准确率
def evaluate_accuracy(data_iter,net):
acc_sum,n,test_l_sum= 0.0,0,0.0
for X,y in data_iter:
acc_sum += (net(X).argmax(dim = 1) == y).float().sum().item()
l = loss(net(X),y).sum()
test_l_sum += l.item()
n += y.shape[0]
return acc_sum/n,test_l_sum/n
# 模型训练
num_epochs = 50
lr = 0.1
test_acc, train_acc = [], []
train_loss, test_loss = [], []
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y).sum()
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_a, test_l = evaluate_accuracy(test_iter, net)
test_acc.append(test_a)
test_loss.append(test_l)
train_acc.append(train_acc_sum / n)
train_loss.append(train_l_sum / n)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_loss[epoch], train_acc[epoch], test_acc[epoch]))