CNN详解-基于python基础库实现的简单CNN

CNN,即卷积神经网络,主要用于图像识别,分类。由输入层,卷积层,池化层,全连接层(Affline层),Softmax层叠加而成。卷积神经网络中还有一个非常重要的结构:过滤器,它作用于层与层之间(卷积层与池化层),决定了怎样对数据进行卷积和池化。下面先直观理解下卷积和池化

二维卷积

即滤波器的每个格子与滤波器选中数据的格子相乘

三维卷积


三维数据的话,滤波器的也是三维。同时对每个维度进行卷积,最后将每个维度的卷积结果相加,输出二维。

池化

池化分为最大池化层与平均池化层。最大池化层对滤波器选中的数据取最大值,平均池化层对滤波器选中的数据求平均值。一般使用最大池化层。池化层是单独作用于每个维度,若是三维数据,即对每个维度上进行最大/平均操作,输入结果也是三维。

卷积用于提取高层次特征,池化用于缩小参数。一般为一层卷积加一层池化反复叠加或多层卷积加一层池化。
全连接层用于卷积池化后,对数据列化然后经过一两层全连接层,得出结果。
softmax用于最后的分类

好了,知道卷积池化,下面就来实现最简单的一个卷积网络:


灵魂画笔。(*^__^*) relu为激活函数,FC即全连接层,也即Affine层

CNN实现手写数字识别

Package

import sys ,os 
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf #只是用来加载mnist数据集
from PIL import Image
import pandas as pd 
import math

加载MNIST数据集

def one_hot_label(y):
    one_hot_label = np.zeros((y.shape[0],10))
    y = y.reshape(y.shape[0])
    one_hot_label[range(y.shape[0]),y] = 1
    return one_hot_label
    
    
# #(训练图像,训练标签),(测试图像,测试标签)
# # mnist的图像均为28*28尺寸的数据,通道为1
(x_train_origin,t_train_origin),(x_test_origin,t_test_origin) = tf.keras.datasets.mnist.load_data()
X_train = x_train_origin/255.0
X_test = x_test_origin/255.0
m,h,w = x_train_origin.shape
X_train = X_train.reshape((m,1,h,w))
y_train = one_hot_label(t_train_origin)

m,h,w = x_test_origin.shape
X_test = X_test.reshape((m,1,h,w))
y_test = one_hot_label(t_test_origin)
print("shape of x_train is :"+repr(X_train.shape))
print("shape of t_train is :"+repr(y_train.shape))
print("shape of x_test is :"+repr(X_test.shape))
print("shape of t_test is :"+repr(y_test.shape))

shape of x_train is :(60000, 1, 28, 28)
shape of t_train is :(60000, 10)
shape of x_test is :(10000, 1, 28, 28)
shape of t_test is :(10000, 10)

显示图像

index = 0
plt.imshow(X_train[index].reshape((28,28)),cmap = plt.cm.gray)
print("y is:"+str(np.argmax(y_train[index])))
y is:5
output_7_1.png

激活函数

def relu(input_X):
    """
    Arguments:
        input_X -- a numpy array
    Return :
        A: a numpy array. let each elements in array all greater or equal 0
    """
    
    A = np.where(input_X < 0 ,0,input_X)
    return A
    
def softmax(input_X):
    """
    Arguments:
        input_X -- a numpy array
    Return :
        A: a numpy array same shape with input_X
    """
    exp_a = np.exp(input_X)
    sum_exp_a = np.sum(exp_a,axis=1)
    sum_exp_a = sum_exp_a.reshape(input_X.shape[0],-1)
    ret = exp_a/sum_exp_a
   # print(ret)
    return ret

损失函数

def cross_entropy_error(labels,logits):
    return -np.sum(labels*np.log(logits))

卷积层

class Convolution:
    def __init__(self,W,fb,stride = 1,pad = 0):
        """
        W-- 滤波器权重,shape为(FN,NC,FH,FW),FN 为滤波器的个数
        fb -- 滤波器的偏置,shape 为(1,FN) 
        stride -- 步长
        pad -- 填充个数
        """
        self.W = W
        self.fb  = fb  
        self.stride = stride
        self.pad = pad
        
        
        self.col_X = None
        self.X = None
        self.col_W = None
        
        self.dW = None
        self.db = None
        self.out_shape = None
    #    self.out = None
        
    def forward (self ,input_X):
        """
        input_X-- shape为(m,nc,height,width)
        """   
        self.X = input_X
        FN,NC,FH,FW = self.W.shape
        
        m,input_nc, input_h,input_w = self.X.shape
    
        #先计算输出的height和widt
        out_h = int((input_h+2*self.pad-FH)/self.stride + 1)
        out_w = int((input_w+2*self.pad-FW)/self.stride + 1)
    
        #将输入数据展开成二维数组,shape为(m*out_h*out_w,FH*FW*C)
        self.col_X = col_X = im2col2(self.X,FH,FW,self.stride,self.pad)
        
        #将滤波器一个个按列展开(FH*FW*C,FN)
        self.col_W = col_W = self.W.reshape(FN,-1).T
        out = np.dot(col_X,col_W)+self.fb
        out = out.T
        out = out.reshape(m,FN,out_h,out_w)
        self.out_shape = out.shape
        return out
    
    def backward(self, dz,learning_rate):
        #print("==== Conv backbward ==== ")
        assert(dz.shape == self.out_shape)
    
        FN,NC,FH,FW = self.W.shape
        o_FN,o_NC,o_FH,o_FW = self.out_shape
        
        col_dz  = dz.reshape(o_NC,-1)
        col_dz = col_dz.T
        
        self.dW = np.dot(self.col_X.T,col_dz)  #shape is (FH*FW*C,FN)
        self.db = np.sum(col_dz,axis=0,keepdims=True)

        
        self.dW = self.dW.T.reshape(self.W.shape)
        self.db = self.db.reshape(self.fb.shape)
        
    
        d_col_x = np.dot(col_dz,self.col_W.T) #shape is (m*out_h*out_w,FH,FW*C)
        dx = col2im2(d_col_x,self.X.shape,FH,FW,stride=1)
        
        assert(dx.shape == self.X.shape)
        
        #更新W和b
        self.W = self.W - learning_rate*self.dW
        self.fb = self.fb -learning_rate*self.db
        
        return dx
    
    

池化层

class Pooling:
    def __init__(self,pool_h,pool_w,stride = 1,pad = 0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad 
        self.X = None
        self.arg_max = None
        
    def forward ( self,input_X) :
        """
        前向传播
        input_X-- shape为(m,nc,height,width)
        """  
        self.X = input_X
        N , C, H, W = input_X.shape
        out_h = int(1+(H-self.pool_h)/self.stride)
        out_w = int(1+(W-self.pool_w)/self.stride)
        
        #展开
        col = im2col2(input_X,self.pool_h,self.pool_w,self.stride,self.pad)
        col = col.reshape(-1,self.pool_h*self.pool_w)
        arg_max = np.argmax(col,axis=1)
        #最大值
        out = np.max(col,axis=1)
        out =out.T.reshape(N,C,out_h,out_w)
        self.arg_max = arg_max
        return out
    
    def backward(self ,dz):
        """
        反向传播
        Arguments:
        dz-- out的导数,shape与out 一致
        
        Return:
        返回前向传播是的input_X的导数
        """ 
        pool_size = self.pool_h*self.pool_w
        dmax = np.zeros((dz.size,pool_size))
        dmax[np.arange(self.arg_max.size),self.arg_max.flatten()] = dz.flatten()
        
        dx = col2im2(dmax,out_shape=self.X.shape,fh=self.pool_h,fw=self.pool_w,stride=self.stride)
        return dx
    
    

Relu层

class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self ,X):
        self.mask = X <= 0
        out = X
        out[self.mask] = 0
        return out
    
    def backward(self,dz):
        dz[self.mask] = 0
        dx = dz 
        return dx

SoftMax层

class SoftMax:
    def __init__ (self):
        self.y_hat = None
        
    def forward(self,X):
        
        self.y_hat = softmax(X)
        return self.y_hat
    
    def backward(self,labels):
        m = labels.shape[0]
        dx = (self.y_hat - labels)
        
        return dx
    
def compute_cost(logits,label):
    return cross_entropy_error(label,logits)

Affine FC层

class Affine:
    def __init__(self,W,b):
        self.W = W # shape is (n_x,n_unit)
        self.b  = b  # shape is(1,n_unit)
        self.X = None
        self.origin_x_shape = None
        
        self.dW = None
        self.db = None
        
        self.out_shape =None
        
    def forward(self,X):
        self.origin_x_shape = X.shape 
        self.X = X.reshape(X.shape[0],-1)#(m,n)
        out =  np.dot(self.X, self.W)+self.b
        self.out_shape = out.shape
        return out
    
    def backward(self,dz,learning_rate):
        """
        dz-- 前面的导数
        """  
#         print("Affine backward")
#         print(self.X.shape)
#         print(dz.shape)
#         print(self.W.shape)
    
        assert(dz.shape == self.out_shape)
        
        m = self.X.shape[0]
        
        self.dW = np.dot(self.X.T,dz)/m
        self.db = np.sum(dz,axis=0,keepdims=True)/m
        
        assert(self.dW.shape == self.W.shape)
        assert(self.db.shape == self.b.shape)
        
        dx = np.dot(dz,self.W.T)
        assert(dx.shape == self.X.shape)
        
        dx = dx.reshape(self.origin_x_shape) # 保持与之前的x一样的shape
        
        #更新W和b
        self.W = self.W-learning_rate*self.dW
        self.b = self.b - learning_rate*self.db
        
        return dx
        

模型

class SimpleConvNet:

    def __init__(self):
        self.X = None
        self.Y= None
        self.layers = []

    def add_conv_layer(self,n_filter,n_c , f, stride=1, pad=0):
        """
        添加一层卷积层
        Arguments:
        n_c -- 输入数据通道数,也即卷积层的通道数
        n_filter -- 滤波器的个数
        f --滤波器的长/宽

        Return :
        Conv -- 卷积层
        """

        # 初始化W,b
        W = np.random.randn(n_filter, n_c, f, f)*0.01
        fb = np.zeros((1, n_filter))
        # 卷积层
        Conv = Convolution(W, fb, stride=stride, pad=pad)
        return Conv

    def add_maxpool_layer(self, pool_shape, stride=1, pad=0):
        """
        添加一层池化层
        Arguments:
        pool_shape -- 滤波器的shape
        f -- 滤波器大小
        Return :
         Pool -- 初始化的Pool类
        """
        pool_h, pool_w = pool_shape
        pool = Pooling(pool_h, pool_w, stride=stride, pad=pad)
        
        return pool
    
    def add_affine(self,n_x, n_units):
        """
        添加一层全连接层
        Arguments:
        n_x -- 输入个数
        n_units -- 神经元个数
        Return :
        fc_layer -- Affine层对象
        """
        
        W= np.random.randn(n_x, n_units)*0.01
        
        b = np.zeros((1, n_units))
        
        fc_layer = Affine(W,b)
        
        return fc_layer
    
    def add_relu(self):
        relu_layer =  Relu()
        return relu_layer
    
    
    def add_softmax(self):
        softmax_layer = SoftMax()
        return softmax_layer
    
    #计算卷积或池化后的H和W
    def cacl_out_hw(self,HW,f,stride = 1,pad = 0):
        return (HW+2*pad - f)/stride+1
    

    
    
    def init_model(self,train_X,n_classes):
        """
        初始化一个卷积层网络
        """
        N,C,H,W = train_X.shape
        #卷积层
        n_filter = 4
        f = 7
        
        conv_layer = self.add_conv_layer(n_filter= n_filter,n_c=C,f=f,stride=1)
        
        out_h = self.cacl_out_hw(H,f)
        out_w = self.cacl_out_hw(W,f)
        out_ch = n_filter
        
        self.layers.append(conv_layer)
        
        #Relu
        relu_layer = self.add_relu()
        self.layers.append(relu_layer)
        
        #池化
        f = 2
        pool_layer = self.add_maxpool_layer(pool_shape=(f,f),stride=2)
        out_h = self.cacl_out_hw(out_h,f,stride=2)
        out_w = self.cacl_out_hw(out_w,f,stride=2)
        #out_ch 不改变
        self.layers.append(pool_layer)
        

        
        
        #Affine层
        n_x = int(out_h*out_w*out_ch)
        n_units = 32
        fc_layer = self.add_affine(n_x=n_x,n_units=n_units)
        self.layers.append(fc_layer)
        
        #Relu
        relu_layer = self.add_relu()
        self.layers.append(relu_layer)
        
        #Affine
        fc_layer = self.add_affine(n_x=n_units,n_units=n_classes)
        self.layers.append(fc_layer)
        
        #SoftMax
        softmax_layer = self.add_softmax()
        self.layers.append(softmax_layer)
        
        
        
    def forward_progation(self,train_X, print_out = False):
        """
        前向传播
        Arguments:
        train_X -- 训练数据
        f -- 滤波器大小

        Return :
         Z-- 前向传播的结果
         loss -- 损失值
        """
    
        
        N,C,H,W = train_X.shape
        index = 0
        # 卷积层
        conv_layer = self.layers[index]
        X = conv_layer.forward(train_X)
        index =index+1
        if print_out:
            print("卷积之后:"+str(X.shape))
        # Relu
        relu_layer =  self.layers[index]
        index =index+1
        X = relu_layer.forward(X)
        if print_out:
            print("Relu:"+str(X.shape))
            
        
        # 池化层
        pool_layer = self.layers[index]
        index =index+1
        X = pool_layer.forward(X)
        if print_out:
            print("池化:"+str(X.shape))


        #Affine层
        fc_layer = self.layers[index]
        index =index+1
        X = fc_layer.forward(X)
        if print_out:
            print("Affline 层的X:"+str(X.shape))

        #Relu
        relu_layer = self.layers[index]
        index =index+1
        X = relu_layer.forward(X)
        if print_out:
            print("Relu 层的X:"+str(X.shape))
        
        #Affine层
        fc_layer = self.layers[index]
        index =index+1
        X = fc_layer.forward(X)
        if print_out:
            print("Affline 层的X:"+str(X.shape))

        #SoftMax层
        sofmax_layer = self.layers[index]
        index =index+1
        A = sofmax_layer.forward(X)
        if print_out:
            print("Softmax 层的X:"+str(A.shape))
            
        return A
        
    def back_progation(self,train_y,learning_rate):
        """
        反向传播
        Arguments:
   
        """
        index = len(self.layers)-1
        sofmax_layer = self.layers[index]
        index -= 1
        dz = sofmax_layer.backward(train_y)
        
        fc_layer = self.layers[index]
        dz = fc_layer.backward(dz,learning_rate=learning_rate)
        index -= 1
        
        relu_layer = self.layers[index]
        dz = relu_layer.backward(dz)
        index -= 1
        
        fc_layer = self.layers[index]
        dz = fc_layer.backward(dz,learning_rate=learning_rate)
        index -= 1
        
        pool_layer = self.layers[index]
        dz = pool_layer.backward(dz)
        index -= 1
        
        relu_layer =  self.layers[index]
        dz = relu_layer.backward(dz)
        index -= 1
        
        conv_layer = self.layers[index]
        conv_layer.backward(dz,learning_rate=learning_rate)
        index -= 1
        
      
    def get_minibatch(self,batch_data,minibatch_size,num):
        m_examples = batch_data.shape[0]
        minibatches = math.ceil( m_examples / minibatch_size)
 
        if(num < minibatches):
            return batch_data[num*minibatch_size:(num+1)*minibatch_size]
        else:
            return batch_data[num*minibatch_size:m_examples]
    
    
    def optimize(self,train_X, train_y,minibatch_size,learning_rate=0.05,num_iters=500):
        """
        优化方法
        Arguments:
        train_X -- 训练数据 
        train_y -- 训练数据的标签
        learning_rate -- 学习率
        num_iters -- 迭代次数
        minibatch_size 
        """
        m = train_X.shape[0]
        num_batches  = math.ceil(m / minibatch_size)
        
        costs = []
        for iteration in range(num_iters):
            iter_cost = 0
            for batch_num in range(num_batches):
                minibatch_X = self.get_minibatch(train_X,minibatch_size,batch_num)
                minibatch_y = self.get_minibatch(train_y,minibatch_size,batch_num)
                
                # 前向传播
                A = self.forward_progation(minibatch_X,print_out=False)
                #损失:
                cost = compute_cost (A,minibatch_y)
                #反向传播
                self.back_progation(minibatch_y,learning_rate)
                if(iteration%100 == 0):
                    iter_cost += cost/num_batches
                    
            if(iteration%100 == 0):
                print("After %d iters ,cost is :%g" %(iteration,iter_cost))
                costs.append(iter_cost)
            
            
            
            
        #画出损失函数图
        plt.plot(costs)
        plt.xlabel("iterations/hundreds")
        plt.ylabel("costs")
        plt.show()
        
       
    def predicate(self, train_X):
        """
        预测
        """
        logits = self.forward_progation(train_X)
        one_hot = np.zeros_like(logits)
        one_hot[range(train_X.shape[0]),np.argmax(logits,axis=1)] = 1
        return one_hot   

    def fit(self,train_X, train_y):
        """
        训练
        """
        self.X = train_X
        self.Y = train_y
        n_y = train_y.shape[1]
        m = train_X.shape[0]
        
        #初始化模型
        self.init_model(train_X,n_classes=n_y)

        self.optimize(train_X, train_y,minibatch_size=10,learning_rate=0.05,num_iters=800)
        
        logits = self.predicate(train_X)
        
        accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(train_y,axis=1))/m
        print("训练集的准确率为:%g" %(accuracy))
convNet = SimpleConvNet()
#拿20张先做实验
train_X = X_train[0:10]
train_y = y_train[0:10]
convNet.fit(train_X,train_y)
After 0 iters ,cost is :23.0254
After 100 iters ,cost is :14.5255
After 200 iters ,cost is :6.01782
After 300 iters ,cost is :5.71148
After 400 iters ,cost is :5.63212
After 500 iters ,cost is :5.45006
After 600 iters ,cost is :5.05849
After 700 iters ,cost is :4.29723
output_32_1.png
训练集的准确率为:0.9

预测

logits = convNet.predicate(X_train[0:10])
m = 10
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(y_train[0:10],axis=1))/m
print("训练的准确率为:%g" %(accuracy))
训练的准确率为:0.9
index = 0
plt.imshow(X_train[index].reshape((28,28)),cmap = plt.cm.gray)
print("y is:"+str(np.argmax(y_train[index])))
print("your predicate result is :"+str(np.argmax(logits[index])))
y is:5
your predicate result is :5
output_35_1.png
logits = convNet.predicate(X_test)
m = X_test.shape[0]
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(y_test,axis=1))/m
print("测试的准确率为:%g" %(accuracy))
测试的准确率为:0.1031

因为训练的数据只有10个,所以测试的准确率只有0.1。
本文的目的是实现CNN,了解CNN的过程。有一些辅助函数没有显示出来,用于将图像转成矩阵数据,方便卷积操作,然后再将其转换成图像用于后面的操作。如有兴趣,可以查看完整代码。完整代码链接:https://github.com/huanhuang/SimpleConvNet.git

实现过程有参考《深度学习入门》的 《卷积神经网络》那章,借用其思想,但实现有改动,对于图像转矩阵,矩阵转图像进行了改写,更易理解。

你可能感兴趣的:(CNN详解-基于python基础库实现的简单CNN)