纯NumPy实现CNN网络,MNIST精确度99.3%

用NumPy仿照PyTorch写了一个深度学习模块包,实现了CNN,在MNIST上有99.33%的测试精度。

包内容包括神经网络层(卷积层、线性层),损失函数(MSE、Cross Entropy)和优化器(Adam、RMSProp)。

完整代码地址:EthanLifeGreat/NumPy_CNN: This project implemented some Convolutional Neural Network modules using pure NumPy. And a network built by these modules achieved over 99% test accuracy on MNIST dataset. (github.com)

 

先来看看完成后的模块怎么写:

class ConvNetwork(SequentialNeuralNetwork):
    def __init__(self, output_size):
        # NOTE: feel free to change structure and seed
        sequential = list()
        sequential.append(ConvolutionModule(1, 32, window_size=(5, 5), stride=(1, 1), padding=(2, 2)))
        sequential.append(ReluModule())
        sequential.append(MaxPoolModule(window_size=(2, 2), stride=(2, 2)))
        sequential.append(DropoutModule(p=0.15))
        sequential.append(ConvolutionModule(32, 64, window_size=(3, 3), stride=(1, 1), padding=(1, 1)))
        sequential.append(ReluModule())
        sequential.append(MaxPoolModule(window_size=(2, 2), stride=(2, 2)))
        sequential.append(DropoutModule(p=0.15))
        
        sequential.append(LinearModule(7 * 7 * 64, hidden_size))
        sequential.append(ReluModule())
        sequential.append(LinearModule(hidden_size, hidden_size))
        sequential.append(ReluModule())
        sequential.append(DropoutModule(p=0.25))
        sequential.append(LinearModule(hidden_size, hidden_size))
        sequential.append(ReluModule())
        sequential.append(LinearModule(hidden_size, hidden_size))
        sequential.append(ReluModule())
        sequential.append(DropoutModule(p=0.25))
        sequential.append(LinearModule(hidden_size, output_size))

        loss_func = CrossEntropyLoss()
        optimizer = Adam(lr=1e-3)
        super().__init__(sequential, loss_func, optimizer)

 

上面的类中所用部分模块的定义:

以下代码仅供预览,是不全的,跑不通的。要可以运行的完整版请前往文章开头的Github地址下载完整版,上面也有使用教程。

class Adam:
    def __init__(self, lr=1e-3, beta1=0.9, beta2=0.999, eps=1e-8):
        self.alpha = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.eps = eps
        self.m = 0
        self.v = 0
        self.t = 0

    def step(self, dw):
        self.t += 1
        g = dw
        m = (self.beta1 * self.m + (1 - self.beta1) * g)
        v = (self.beta2 * self.v + (1 - self.beta2) * g ** 2)
        alpha = self.alpha * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t)
        dw = alpha * m / (np.sqrt(v) + self.eps)
        self.m = m
        self.v = v
        return dw


class ReluModule(NeuralNetworkModule):
    # ReLU Layer: y = 0, x<0; x, x>=0
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        y = x.copy()
        y[x < 0] = 0
        return y

    def backward(self, dy):
        d = np.ones_like(self.x)
        d[self.x < 0] = 0
        return d * dy

class DropoutModule(NeuralNetworkModule):
    def __init__(self, p):
        self.keep_prob = 1-p
        self.mask = None

    def forward(self, x):
        self.mask = np.random.binomial(1, self.keep_prob, size=x.shape)
        y = x * self.mask / self.keep_prob
        return y

    def backward(self, dy):
        dx = dy * self.mask * self.keep_prob
        return dx

    def predict(self, x):
        return x


class LinearModule(OptimizableModule):
    # Linear Layer: W * [X, 1] = Y
    def __init__(self, input_size, output_size, random=True):
        super().__init__()
        if random:
            std_v = 1. / np.sqrt(input_size)
            self.w = (np.random.uniform(-std_v, std_v, [input_size + 1, output_size]))
        else:
            self.w = np.zeros([input_size + 1, output_size])
        self.dw = None
        self.x_1 = None
        self.input_shape = None
        self.input_size = input_size

    def forward(self, x):
        num_samples = x.shape[0]
        self.input_shape = x.shape
        if len(x.shape) > 2:
            # Squeezing
            x = x.reshape(num_samples, self.input_size)
        self.x_1 = np.concatenate([x, np.ones([num_samples, 1])], axis=1)
        y = matmul(self.x_1, self.w)
        return y

    def backward(self, dy):
        self.dw = einsum('ij,ih->hj', dy, self.x_1) / dy.shape[0]
        dx = matmul(dy, np.transpose(self.w))[:, :-1]
        dx = dx.reshape(self.input_shape)
        return dx

class ConvolutionModule(OptimizableModule):
    # !!!!! should satisfy: 2p + x - w == (y - 1) * s !!!!!
    def __init__(self, num_channels_in, num_channels_out, window_size, padding, stride=(1, 1), random=True):
        # weight_size: [height, width]
        super().__init__()
        # unrolled weight
        self.window_size = window_size
        self.stride = stride
        self.padding = padding
        self.y_width = 0
        self.y_height = 0
        self.zero_image_shape = None
        self.weight_size = (window_size[0] * window_size[1] * (num_channels_in + 1), num_channels_out)
        self.num_channels_in = num_channels_in
        self.num_channels_out = num_channels_out
        if random:
            std_v = 1. / np.sqrt(np.prod(self.weight_size) * num_channels_in)
            self.w = np.random.uniform(-std_v, std_v, self.weight_size)
        else:
            self.w = np.zeros(self.weight_size)
        self.dw = np.zeros_like(self.w)
        self.x1p = None

    def batch_unroll(self, x, weight_size, stride):
        # x is a 4d tensor([batch_size, height, width, channels])
        unrolled_x, y_height, y_width = batch_image_unroll(x, weight_size, stride)
        self.zero_image_shape = x.shape
        self.y_width, self.y_height = y_width, y_height
        return unrolled_x

    def batch_roll(self, unrolled_x, weight_size, stride):
        # unrolled_x is a 3d tensor([batch_size, y_height * y_width, w_height * w_width * channels])
        y_width, y_height = self.y_width, self.y_height
        num_channels = self.num_channels_in + 1
        x = batch_image_roll(unrolled_x, self.zero_image_shape, weight_size, stride, y_height, y_width, num_channels)
        return x

    @staticmethod
    def batch_matmul(x, w):
        return einsum('ijk,kh->ijh', x, w)

    def batch_convolve(self, x, w):
        # x is bias-included, batched, unrolled input
        # w is unrolled kernel
        y = self.batch_matmul(x, w)
        return y

    def forward(self, x):
        # x_size = [batch_size, height, width, num_channels_in]
        assert len(x.shape) == 4
        num_samples, x_height, x_width = x.shape[0], x.shape[1], x.shape[2]
        x_1 = np.pad(x, ((0, 0), (0, 0), (0, 0), (0, 1)), 'constant', constant_values=1)
        x_1_p = np.pad(x_1, ((0, 0), (self.padding[0], self.padding[0]),
                             (self.padding[1], self.padding[1]), (0, 0)), 'constant', constant_values=0)
        unrolled_x1p = self.batch_unroll(x_1_p, self.window_size, self.stride)
        self.x1p = unrolled_x1p
        unrolled_y = self.batch_convolve(self.x1p, self.w)
        y = unrolled_y.reshape([num_samples, self.y_height, self.y_width, self.num_channels_out])
        return y

    def backward(self, dy):
        num_samples, y_height, y_width = dy.shape[0], dy.shape[1], dy.shape[2]
        dy_unrolled = dy.reshape([num_samples, y_height * y_width, self.num_channels_out])
        dw = einsum('ijk,ikh->jh', np.transpose(self.x1p, axes=(0, 2, 1)), dy_unrolled)
        self.dw = dw / num_samples
        dx1p_unrolled = self.batch_matmul(dy_unrolled, np.transpose(self.w))
        dx1p = self.batch_roll(dx1p_unrolled, self.window_size, self.stride)
        dx1 = dx1p[:, self.padding[0]:-self.padding[0], self.padding[1]:-self.padding[1], :]
        return dx1[:, :, :, :-1]


class MaxPoolModule(NeuralNetworkModule):
    def __init__(self, window_size, stride):
        self.window_size = window_size
        self.stride = stride
        self.mask = None
        self.y_height = 0
        self.y_width = 0
        self.output_size = 0
        self.zero_image_shape = None

    def forward(self, x):
        num_samples, num_channels = x.shape[0], x.shape[3]
        unrolled_x, y_height, y_width = batch_image_unroll(x, self.window_size, self.stride)
        self.zero_image_shape = x.shape
        self.output_size = num_samples * y_height * y_width * num_channels
        unrolled_x = unrolled_x.reshape([num_samples, y_height * y_width, np.prod(self.window_size), num_channels])
        unrolled_x_ = np.transpose(unrolled_x, axes=(0, 1, 3, 2)). \
            reshape(self.output_size, np.prod(self.window_size))
        y = np.max(unrolled_x, axis=2).reshape([num_samples, y_height, y_width, num_channels])
        self.mask = np.argmax(unrolled_x_, axis=1)
        self.y_height, self.y_width = y_height, y_width
        return y

    def backward(self, dy):
        # dy is a batched gradients with size [num_samples, y_height, y_width, num_channels]
        num_samples, y_height, y_width, num_channels = dy.shape[0], dy.shape[1], dy.shape[2], dy.shape[3]
        dy_unrolled = dy.reshape([num_samples, y_height * y_width, 1, num_channels])
        dx_unrolled_ = np.zeros([self.output_size, np.prod(self.window_size)])
        dx_unrolled_[np.arange(self.output_size), self.mask] = dy_unrolled.ravel()
        dx_unrolled = np.transpose(dx_unrolled_.reshape(
            [num_samples, y_height * y_width, num_channels, np.prod(self.window_size)]), axes=(0, 1, 3, 2))
        dx_unrolled = dx_unrolled.reshape([num_samples, y_height * y_width, np.prod(self.window_size) * num_channels])
        dx = batch_image_roll(dx_unrolled, self.zero_image_shape,
                              self.window_size, self.stride, y_height, y_width, num_channels)
        return dx

class CrossEntropyLoss:
    def __call__(self, y_hat, y):
        y_prob = softmax(y_hat)
        # return Loss & Derivative
        return np.sum(- np.log(y_prob) * y), y_prob - y

 

 

 

你可能感兴趣的:(人工智能,神经网络,卷积,numpy,深度学习,pytorch)