计算机视觉入门过程(用时约为2个月):
一,理论学习
1,复习了线性代数和概率论
2,学习了python的numpy库和pytorch库的使用
3,李飞飞cs213n课程视频
4,吴恩达深度学习课程视频
二.实践
1,搭建深度学习环境
2,mnist最高达到98.2%,cifar最高达到94.7%
因为李飞飞cs213n课程和吴恩达深度学习课程都是全英课程,所以在CSDN找了对应的笔记,边看笔记边看视频,这样学起来轻松一些。
(课程视频在B站可以找到)
笔记链接如下:
1.李飞飞cs213n课程:https://blog.csdn.net/qq_34611579/article/details/81072920?utm_source=app&app_version=4.8.0&code=app_1562916241&uLinkId=usr1mkqgl919blen
2.吴恩达深度学习课程:https://blog.csdn.net/wuzhongqiang/article/details/89702268?utm_source=app&app_version=4.8.0&code=app_1562916241&uLinkId=usr1mkqgl919blen
还有一些在学习过程中疑问百度到的结果:
1.numpy 中的随机打乱数据方法np.random.shuffle
https://blog.csdn.net/weixin_43896259/article/details/106116955
2,图像预处理Transforms与normalize
https://blog.csdn.net/aidanmo/article/details/104059612
3.关于transforms.Normalize()函数
https://blog.csdn.net/jzwong/article/details/104272600
4,numpy.floor()函数作用:向下取整
5.torch.utils.data.DataLoader()详解
https://blog.csdn.net/qq_40520596/article/details/106981039
pytorch中 model.cuda的作用
https://www.cnblogs.com/pogeba/p/13890846.html
Pytorch里面nn.CrossEntropyLoss的含义
https://blog.csdn.net/lang_yubo/article/details/105108174
model.train()和model.eval()用法和区别
https://zhuanlan.zhihu.com/p/357075502
以optim.SGD为例介绍pytorch优化器
https://www.sogou.com/link?url=hedJjaC291OV7dVab-QfvHtdr0qpeLU_JZ6a8oyfxdi0c29X6nLNTA..
下面来讲讲mnist和cifar数据集的训练过程
1.mnist数据集
from torchvision import datasets, transforms
import numpy as np
from sklearn.metrics import accuracy_score
import torch
# from tqdm import tqdm
import time
# matrix func
def knn(train_x, train_y, test_x, test_y,k):
since = time.time() # 获取当前时间
m = test_x.size(0) # test_s是torch.tensor类,m是在求它的数据个数
n = train_x.size(0)
# 计算欧几里得距离,得到m*n矩阵,ij表示第i个测试图片与第j个图片的欧几里得距离
print("cal dist matrix")
xx = (test_x ** 2).sum(dim=1, keepdim=True).expand(m, n)
# **2为对每个元素平方,.sum中dim=1,对行求和,keepdim=True时保持二维,=false时降一维。text原来是m*1,.expand后变成m*n。
yy = (train_x ** 2).sum(dim=1, keepdim=True).expand(n, m).transpose(0, 1)
dist_mat = xx + yy - 2 * test_x.matmul(train_x.transpose(0, 1))
mink_idxs = dist_mat.argsort(dim=-1)
res = []
for idxs in mink_idxs:
# voting
res.append(np.bincount(np.array([train_y[idx] for idx in idxs[:k]])).argmax())
assert len(res) == len(test_y)
print("识别率:", accuracy_score(test_y, res))
time_elapsed = time.time() - since
print('KNN mat training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
if __name__ == "__main__":
train_dataset = datasets.CIFAR10(root="./data2", transform=transforms.ToTensor(), train=True)
#参数说明: - root : processed/training.pt 和 processed/test.pt 的主目录
# -train : True = 训练集, False = 测试集
# - download : True = 从互联网上下载数据集,并把数据集放在root目录下.
# 如果数据集之前下载过,将处理过的数据(minist.py中有相关函数)放在processed文件夹下。
test_dataset = datasets.CIFAR10(root="./data2", transform=transforms.ToTensor(), train=False)
# build train&test data
train_x = []
train_y = []
for i in range(len(train_dataset)): #i为int,从0到len(train_dataset)-1
img, target = train_dataset[i] #train_dataset[i]是二元组
train_x.append(img.view(-1))
# view(-1)将多维img(tensor([ [[],[]] , [[],[]] ])转化为一维tensor([])
# ( train_x()是二维[tensor([ , , ,]) , tensor([ , , ,])],第一维是tensor([ , , ,]) ),
# 然后加进去train_x数组里面
train_y.append(target)
if i > 50000:
break
# print(set(train_y))
test_x = []
test_y = []
for i in range(len(test_dataset)):
img, target = test_dataset[i]
test_x.append(img.view(-1))
test_y.append(target)
if i > 9000:
break
print("classes:", set(train_y)) #将所有标签类输出,因为set变成集合后无重复
knn(torch.stack(train_x), train_y, torch.stack(test_x), test_y, 7)#stack将[tensor([]),tensor([])]转化为tensor([[],[]])
# knn_by_iter(torch.stack(train_x), train_y, torch.stack(test_x), test_y, 10)
2.cifar数据集
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#将数据转换为torch.FloatTensor,并标准化
#ToTensor()能够把灰度范围从0-255变换到0-1之间,而后面的transform.Normalize()则把0-1变换到(-1,1).
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
#选择训练集与测试集的数据
train_data = datasets.CIFAR10( 'data',train=True,download=False,transform=transform)
test_data = datasets.CIFAR10('data',train=True,download=False,transform=transform)
# percentage of training set to use as validation
valid_size = 0.2
#obtain training indices that will be used for validation划分训练集和验证集
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int (np.floor(valid_size*num_train))
train_idx,valid_idx = indices[split:],indices[:split]
#define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
#加载数据
num_workers = 0
#每批加载16张图片
batch_size = 16
#perpare data loaders(combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size,
sampler=train_sampler,num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size,
sampler=valid_sampler,num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size,
num_workers=num_workers)
#10classes
classes = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
# 定义卷积神经网络结构
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
#卷积层(32*32*3的图像)
self.conv1 = nn.Conv2d(3,16,3,padding=1)
#卷积层(16*16*16)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#卷积层(8*8*32)
self.conv3 = nn.Conv2d(32,64,3,padding=1)
#最大池化层
self.pool = nn.MaxPool2d(2,2)
#LINEAR LAYER(64*4*4-->500)
self.fc1 = nn.Linear(64*4*4,500)
#linear层(500,10)
self.fc2 = nn.Linear(500,10)
#dropout(p=0.3)
self.dropout = nn.Dropout(0.3)
def forward(self,x):
#add sequence of convolutinal and max pooling layers
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
#flatten image input
x = x.view(-1,64*4*4)
#add dropout layer
x = self.dropout(x)
# add 1st hidden layer,with relu activation function
x = F.relu(self.fc1(x))
# add dropout layer
x = self.dropout(x)
# add 2nd hidden layer,with relu activation function
x = self.fc2(x)
return x
#create a complete CNN
model = Net()
print (model)
#检查是否可以利用GPU
train_on_gpu = torch.cuda.is_available()
#
# if not train_on_gpu:
# print ('CUDA IS NOT AVAILABLE!')
# else:
# print('CUDA IS AVAILABEL!')
#可以将模型加载到GPU上去
if train_on_gpu:
model.cuda()
#选择损失函数与优化函数
#使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()
#使用随机梯度下降,学习率为0.01
optimizer = optim.SGD(model.parameters(),lr=0.01)
# 训练模型的次数
n_epochs = 40
valid_loss_min = np.Inf #track change in calidation loss
for epoch in range(1,n_epochs+1):
#keep tracks of training and validation loss
train_loss = 0.0
valid_loss = 0.0
##################
# 训练集的模型 #
##################
model.train()
for data,target in train_loader:
#move tensors to gpu if cuda is available
if train_on_gpu:
data,target = data.cuda(),target.cuda()
#clear the gradients of all optimized variables
optimizer.zero_grad()
#forward pass:compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output,target)
#backward pass:compute gradient of the loss with respect to model parameters
loss.backward()
#perform a single optimization step(parameters updata)
optimizer.step()
#updata training loss
train_loss += loss.item()*data.size(0)
###############
# 验证集模型 #
##################
model.eval()
for data,target in valid_loader:
if train_on_gpu:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
#计算平均损失
train_loss = train_loss/len(train_loader.sampler)
valid_loss = valid_loss/len(valid_loader.sampler)
#显示训练集与验证集的损失函数
print('Epoch:{} \tTraining loss:{} \tValidation loss:{}'.format(
epoch,train_loss,valid_loss
))
#如果验证集损失函数减少,就保存模型
if valid_loss <= valid_loss_min:
print ('Validation loss decreased ({} --> {}). Saving model ...'.format(
valid_loss_min,valid_loss
))
torch.save(model.state_dict(),'model_cifar.pt')
valid_loss_min = valid_loss
model.load_state_dict(torch.load('model_cifar.pt',map_location=torch.device('cpu')))
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
model.eval()
# iterate over test data
for data, target in test_loader:
# move tensors to GPU if CUDA is available
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output, target)
# update test loss
test_loss += loss.item()*data.size(0)
# convert output probabilities to predicted class
_, pred = torch.max(output, 1)
# compare predictions to true label
correct_tensor = pred.eq(target.data.view_as(pred))
correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
# calculate test accuracy for each object class
for i in range(batch_size):
label = target.data[i]
class_correct[label] += correct[i].item()
class_total[label] += 1
# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(10):
if class_total[i] > 0:
print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
classes[i], 100 * class_correct[i] / class_total[i],
np.sum(class_correct[i]), np.sum(class_total[i])))
else:
print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
100. * np.sum(class_correct) / np.sum(class_total),
np.sum(class_correct), np.sum(class_total)))