pytorch 手写数字识别

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import struct
import torch.optim as optim
from PIL import Image

from matplotlib import pyplot as plt


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.train_images = self.load_images('sample/train-images.idx3-ubyte')
        self.train_labels, self.train_labels_vector = self.load_labels('sample/train-labels.idx1-ubyte')
        self.test_images = self.load_images('sample/t10k-images.idx3-ubyte')
        self.test_labels, self.test_labels_vector = self.load_labels('sample/t10k-labels.idx1-ubyte')

    @staticmethod
    def load_images(file_name):
        with open(file_name, 'rb') as bin_file:
            buffers = bin_file.read()
            magic, num, rows, cols = struct.unpack_from('>IIII', buffers, 0)
            bits = num * rows * cols
            images = struct.unpack_from('>' + str(bits) + 'B', buffers, struct.calcsize('>IIII'))
        images = np.reshape(images, [num, rows * cols])
        images = torch.tensor(images, dtype=torch.float)
        return images

    @staticmethod
    def load_labels(file_name):
        with open(file_name, 'rb') as bin_file:
            buffers = bin_file.read()
            magic, num = struct.unpack_from('>II', buffers, 0)
            labels = struct.unpack_from('>' + str(num) + "B", buffers, struct.calcsize('>II'))
        labels = np.reshape(labels, [num])
        vector = list()
        for label in labels:
            output = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
            output[label] = 1
            vector.append(output)
        vector = torch.tensor(vector, dtype=torch.float)
        labels = torch.tensor(labels, dtype=torch.float)
        return labels, vector

    def show(self, offset=0):
        test_output = torch.max(self(torch.tensor(self.test_images[0 + offset:30 + offset], dtype=torch.float)), 1)
        fig = plt.figure(figsize=(8, 8))
        fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
        for i in range(30):
            images = np.reshape(self.test_images[i + offset], [28, 28])
            ax = fig.add_subplot(6, 5, i + 1, xticks=[], yticks=[])
            ax.imshow(images, cmap=plt.cm.binary, interpolation='nearest')
            ax.text(0, 7, str(test_output[1].numpy()[i]))
        plt.show()

    def train_model(self, epoch):
        print("Train method undefined!")
        pass

    def test_accuracy(self):
        test_output = torch.max(net(torch.tensor(self.test_images, dtype=torch.float)), 1)
        accuracy = (test_output[1].numpy() == self.test_labels.numpy()).sum() / len(self.test_labels)
        print(accuracy)

    def load_image(self, image_name):
        image = Image.open(image_name)
        image = np.dot(image, [0.299, 0.587, 0.114])
        image = np.ones(shape=(28, 28)) * 255 - image  # 负片话,让白色为0,黑色为[0,255]
        image = torch.tensor(image, dtype=torch.float)
        return image


class FullNet(Net):
    def __init__(self):
        super(FullNet, self).__init__()

        self.fc1 = nn.Linear(784, 300)
        self.fc2 = nn.Linear(300, 10)

    def forward(self, x):
        x = F.tanh(self.fc1(x))
        x = F.softmax(self.fc2(x))
        return x

    def train_model(self, epoch, rate=0.1):
        criterion = nn.MSELoss()
        for i in range(epoch):
            output = self(self.train_images)
            loss = criterion(output, self.train_labels_vector)
            loss.backward()
            for f in self.parameters():
                f.data.sub_(f.grad.data * rate)

    def test(self, image_name):
        image = self.load_image(image_name).view(1, 28 * 28)
        return torch.max(self(image), 1)[1].numpy()


class CNNNet(Net):
    def __init__(self):
        super(CNNNet, self).__init__()
        # 输入图像channel:1;输出channel:6;5x5卷积核
        self.conv1 = nn.Conv2d(1, 6, (5, 5))
        self.conv2 = nn.Conv2d(6, 16, (5, 5))
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    @staticmethod
    def load_images(file_name):
        images = super(CNNNet, CNNNet).load_images(file_name)
        return images.view(-1, 1, 28, 28)

    def forward(self, x):
        # 2x2 Max pooling
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # 如果是方阵,则可以只使用一个数字进行定义
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # 除去批处理维度的其他所有维度
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

    def train_model(self, epoch):
        optimizer = optim.Adam(self.parameters())
        criterion = nn.MSELoss()
        for i in range(epoch):
            optimizer.zero_grad()
            output = self(self.train_images)
            loss = criterion(output, self.train_labels_vector)
            loss.backward()
            optimizer.step()

    def test(self, image_name):
        image = self.load_image(image_name).view(1, 1, 28, 28)
        return torch.max(self(image), 1)[1].numpy()


if __name__ == '__main__':
    net = torch.load('cc.model')
    # net = FullNet()
    # net = CNNNet()
    # for i in range(10):
    #     net.train_model(10)
    #     net.test_accuracy()

    print(net.test("test.jpg"))
    # net.show()
    # torch.save(net, 'cc.model')

你可能感兴趣的:(深度学习,pytorch,机器学习)