深度学习之torch(一)MINIST手写字符分类

skimage教程
skimage开发文档
机器学习新手工程师常犯的6大错误http://www.duozhishidai.com/article-12200-1.html
深度学习需掌握的知识https://blog.csdn.net/duozhishidai/article/details/87301056
从数据集的建立,到模型的建立,到训练,到预测!!!
中间遇到很多坑
数据建立:MSELoss损失函数要求独热编码,CrossEntropyLosss损失函数要求预测端是字符型,label要求长整型,函数将label转成独热编码
模型建立:全连接时需将输入的batch_size与其它维度共4维转成2维
训练:没什么好说的
预测:也没什么好说的,总之实现对输入图片的预测功能

附代码:
训练代码,其中数据集解码函数直接复制的

import cv2
import numpy as np
import struct
import torch.nn as nn
import torch.utils.data as Data
from torch.autograd import Variable
import torch
from torch.utils.data import Dataset,DataLoader,TensorDataset

class Net(nn.Module):

 	def __init__(self):
 		super(Net,self).__init__()
 		body = []
 		body.append(
 			nn.Conv2d(1,16,3)
 			)
 		body.append(
 			nn.BatchNorm2d(16)
 			)
 		body.append(
 			nn.ReLU(True)
 			)
 		body.append(
 			nn.Conv2d(16,32,3)
 			)
 		body.append(
 			nn.BatchNorm2d(32)
 			)
 		body.append(
 			nn.ReLU(True)
 			)
 		body.append(
 			nn.MaxPool2d(kernel_size=2,stride=2)
 			)
 		body.append(
 			nn.Conv2d(32,64,3)
 			)
 		body.append(
 			nn.BatchNorm2d(64)
 			)
 		body.append(
 			nn.ReLU(True)
 			)
 		body.append(
 			nn.Conv2d(64,128,3)
 			)
 		body.append(
 			nn.BatchNorm2d(128)
 			)
 		body.append(
 			nn.ReLU(True)
 			)
 		body.append(
 			nn.MaxPool2d(kernel_size=2,stride=2)
 			)
 		tail = []
 		tail.append(
 			nn.Linear(128*4*4,1024)
 			)
 		tail.append(
 			nn.ReLU(True)
 			)
 		tail.append(
 			nn.Linear(1024,128)
 			)
 		tail.append(
 			nn.ReLU(True)
 			)
 		tail.append(
 			nn.Linear(128,10)
 			)
 		self.body = nn.Sequential(*body)
 		self.tail = nn.Sequential(*tail)
 	def forward(self,x):
 		ret = self.body(x)
 		#手动四维转二维
 		ret = ret.view(ret.size(0),-1)
 		ret = self.tail(ret)
 		return ret

def decode_train(idx3_ubyte_file):
    """
    解析idx3文件的通用函数
    :param idx3_ubyte_file: idx3文件路径
    :return: 数据集
    """
    # 读取二进制数据
    bin_data = open(idx3_ubyte_file, 'rb').read()
 
    # 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
    offset = 0
    fmt_header = '>iiii'
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
    print ('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
 
    # 解析数据集
    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    fmt_image = '>' + str(image_size) + 'B'
    images = np.empty((num_images, num_rows, num_cols))
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print ('已解析 %d' % (i + 1) + '张')
        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images

path_train = 'train-images.idx3-ubyte'
image_list = decode_train(path_train)

def decode_label(idx1_ubyte_file):
    """
    解析idx1文件的通用函数
    :param idx1_ubyte_file: idx1文件路径
    :return: 数据集
    """
    # 读取二进制数据
    bin_data = open(idx1_ubyte_file, 'rb').read()
 
    # 解析文件头信息,依次为魔数和标签数
    offset = 0
    fmt_header = '>ii'
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
    print ('魔数:%d, 图片数量: %d张' % (magic_number, num_images))
 
    # 解析数据集
    offset += struct.calcsize(fmt_header)
    fmt_image = '>B'
    labels = np.empty(num_images)
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print ('已解析 %d' % (i + 1) + '张')
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels

path_label = 'train-labels.idx1-ubyte'
label_list = decode_label(path_label)

def data_loader(images,labels):
	#独热编码第二个参数需要对其进行转置
	# labels = labels.reshape(labels.shape[0],1)
	images = torch.from_numpy(images)
	labels = torch.from_numpy(labels)
	# labels_map = torch.zeros((labels.shape[0],10))
	#独热编码,函数第1个参数是轴,用于定位1是定位列,第二个参数是位置,具体那一列,第三个参数是给予该位置赋值
	# labels_map = labels_map.scatter_(1,labels.long(),1)
	dataset = TensorDataset(images,labels)
	return Data.DataLoader(
 		dataset,
 		batch_size=100,
 		shuffle=True,
 		num_workers=1
 		)

def train(data):
	net = Net()
	optimizer = torch.optim.SGD(net.parameters(),lr=0.001)
	loss_function = torch.nn.CrossEntropyLoss()
	for  i in range(20):
		for item in data:
			data_x = Variable(item[0]).float().unsqueeze(0).view(100,1,28,28)
			data_y = Variable(item[1])
			prediction = net(data_x)
			loss = loss_function(prediction,data_y.long())
			optimizer.zero_grad()
			loss.backward()
			optimizer.step()
			print(loss)

	torch.save(net,'minist.pkl')

def main():
	data = data_loader(image_list,label_list)
	train(data)

if __name__=='__main__':
	main()

预测代码:

import cv2
import numpy as np
import torch
from minis import Net
from torch.autograd import Variable

def main():
	model_path = 'minist.pkl'
	net = torch.load(model_path)

	image_path = r'../test1.jpg'
	image = cv2.imread(image_path,0)
	image = cv2.resize(image,(28,28))
	image = cv2.threshold(image,156,255,cv2.THRESH_BINARY_INV)
	#thres = cv2.threshold(image,127,255,cv2.THRESH_BINARY)
	image = image[1]
	thres = torch.from_numpy(np.asarray(image))
	thres = thres.unsqueeze(0).unsqueeze(0)
	thres = Variable(thres)
	prediction = net(thres.float())
	loss_function = torch.nn.CrossEntropyLoss()
	min_loss = 100
	pre = None
	for i in range(10):
		val = np.asarray([i])
		val = Variable(torch.from_numpy(val))
		loss = loss_function(prediction,val.long())
		if min_loss>loss:
			min_loss = loss
			pre = i

	print(pre)

if __name__=='__main__':
	main()

你可能感兴趣的:(图像处理)