skimage教程
skimage开发文档
机器学习新手工程师常犯的6大错误http://www.duozhishidai.com/article-12200-1.html
深度学习需掌握的知识https://blog.csdn.net/duozhishidai/article/details/87301056
从数据集的建立,到模型的建立,到训练,到预测!!!
中间遇到很多坑
数据建立:MSELoss损失函数要求独热编码,CrossEntropyLosss损失函数要求预测端是字符型,label要求长整型,函数将label转成独热编码
模型建立:全连接时需将输入的batch_size与其它维度共4维转成2维
训练:没什么好说的
预测:也没什么好说的,总之实现对输入图片的预测功能
附代码:
训练代码,其中数据集解码函数直接复制的
import cv2
import numpy as np
import struct
import torch.nn as nn
import torch.utils.data as Data
from torch.autograd import Variable
import torch
from torch.utils.data import Dataset,DataLoader,TensorDataset
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
body = []
body.append(
nn.Conv2d(1,16,3)
)
body.append(
nn.BatchNorm2d(16)
)
body.append(
nn.ReLU(True)
)
body.append(
nn.Conv2d(16,32,3)
)
body.append(
nn.BatchNorm2d(32)
)
body.append(
nn.ReLU(True)
)
body.append(
nn.MaxPool2d(kernel_size=2,stride=2)
)
body.append(
nn.Conv2d(32,64,3)
)
body.append(
nn.BatchNorm2d(64)
)
body.append(
nn.ReLU(True)
)
body.append(
nn.Conv2d(64,128,3)
)
body.append(
nn.BatchNorm2d(128)
)
body.append(
nn.ReLU(True)
)
body.append(
nn.MaxPool2d(kernel_size=2,stride=2)
)
tail = []
tail.append(
nn.Linear(128*4*4,1024)
)
tail.append(
nn.ReLU(True)
)
tail.append(
nn.Linear(1024,128)
)
tail.append(
nn.ReLU(True)
)
tail.append(
nn.Linear(128,10)
)
self.body = nn.Sequential(*body)
self.tail = nn.Sequential(*tail)
def forward(self,x):
ret = self.body(x)
#手动四维转二维
ret = ret.view(ret.size(0),-1)
ret = self.tail(ret)
return ret
def decode_train(idx3_ubyte_file):
"""
解析idx3文件的通用函数
:param idx3_ubyte_file: idx3文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx3_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = '>iiii'
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
print ('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header)
fmt_image = '>' + str(image_size) + 'B'
images = np.empty((num_images, num_rows, num_cols))
for i in range(num_images):
if (i + 1) % 10000 == 0:
print ('已解析 %d' % (i + 1) + '张')
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
offset += struct.calcsize(fmt_image)
return images
path_train = 'train-images.idx3-ubyte'
image_list = decode_train(path_train)
def decode_label(idx1_ubyte_file):
"""
解析idx1文件的通用函数
:param idx1_ubyte_file: idx1文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx1_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数和标签数
offset = 0
fmt_header = '>ii'
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
print ('魔数:%d, 图片数量: %d张' % (magic_number, num_images))
# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
if (i + 1) % 10000 == 0:
print ('已解析 %d' % (i + 1) + '张')
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels
path_label = 'train-labels.idx1-ubyte'
label_list = decode_label(path_label)
def data_loader(images,labels):
#独热编码第二个参数需要对其进行转置
# labels = labels.reshape(labels.shape[0],1)
images = torch.from_numpy(images)
labels = torch.from_numpy(labels)
# labels_map = torch.zeros((labels.shape[0],10))
#独热编码,函数第1个参数是轴,用于定位1是定位列,第二个参数是位置,具体那一列,第三个参数是给予该位置赋值
# labels_map = labels_map.scatter_(1,labels.long(),1)
dataset = TensorDataset(images,labels)
return Data.DataLoader(
dataset,
batch_size=100,
shuffle=True,
num_workers=1
)
def train(data):
net = Net()
optimizer = torch.optim.SGD(net.parameters(),lr=0.001)
loss_function = torch.nn.CrossEntropyLoss()
for i in range(20):
for item in data:
data_x = Variable(item[0]).float().unsqueeze(0).view(100,1,28,28)
data_y = Variable(item[1])
prediction = net(data_x)
loss = loss_function(prediction,data_y.long())
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss)
torch.save(net,'minist.pkl')
def main():
data = data_loader(image_list,label_list)
train(data)
if __name__=='__main__':
main()
预测代码:
import cv2
import numpy as np
import torch
from minis import Net
from torch.autograd import Variable
def main():
model_path = 'minist.pkl'
net = torch.load(model_path)
image_path = r'../test1.jpg'
image = cv2.imread(image_path,0)
image = cv2.resize(image,(28,28))
image = cv2.threshold(image,156,255,cv2.THRESH_BINARY_INV)
#thres = cv2.threshold(image,127,255,cv2.THRESH_BINARY)
image = image[1]
thres = torch.from_numpy(np.asarray(image))
thres = thres.unsqueeze(0).unsqueeze(0)
thres = Variable(thres)
prediction = net(thres.float())
loss_function = torch.nn.CrossEntropyLoss()
min_loss = 100
pre = None
for i in range(10):
val = np.asarray([i])
val = Variable(torch.from_numpy(val))
loss = loss_function(prediction,val.long())
if min_loss>loss:
min_loss = loss
pre = i
print(pre)
if __name__=='__main__':
main()