手写 CPU 卷积核加速神经网络计算(1)——naive 实现 卷积、池化、激活、全连接、批归一化(python 实现)

1 Conv2d

def conv2d(input_numpy, kernel_weight_numpy, kernel_bias_numpy, padding = 0):
    B, Ci, Hi, Wi = input_numpy.shape
    input_pad_numpy = torch.zeros(B, Ci, Hi+2*padding, Wi+2*padding)
    if padding > 0:
        input_pad_numpy[:, :, padding:-padding, padding:-padding] = input_numpy
    else:
        input_pad_numpy = input_numpy
    B, Ci, Hi, Wi = input_pad_numpy.shape
    Co, Ci, Hf, Wf = kernel_weight_numpy.shape
    Ho, Wo = Hi - Hf + 1, Wi - Wf + 1
    # conv2d weight 7 loop
    out = np.zeros((B,Co,Ho,Wo))
    for b in range(B):
        for i in range(Ho):
            for j in range(Wo):
                for k in range(Co):
                    for l in range(Hf):
                        for m in range(Wf):
                            for n in range(Ci):
                                out[b,k,i,j] += input_pad_numpy[b,n,i+l,j+m]*kernel_weight_numpy[k,n,l,m]
    for b in range(B):
        for i in range(Ho):
            for j in range(Wo):
                for k in range(Co):
                    out[b,k,i,j] += kernel_bias_numpy[k]
    return out

2 ReLU

def relu(input_numpy):
    isConv = len(input_numpy.shape)==4
    if isConv:
        B, C, H, W = input_numpy.shape
        out = np.zeros((B,C,H,W))
        for b in range(B):
            for c in range(C):
                for h in range(H):
                    for w in range(W):
                        out[b,c,h,w] = max(0, input_numpy[b,c,h,w])
    else:
        B, E = input_numpy.shape
        out = np.zeros((B,E))
        for b in range(B):
            for e in range(E):
                out[b,e] = max(0, input_numpy[b,e])     
    return out

3 MaxPool2d

def maxpool(input_numpy,size=2,stride=2):
    B, C, Hi, Wi = input_numpy.shape
    Ho, Wo = int(np.floor(Hi/stride)),int(np.floor(Wi/stride))
    row_remainder,col_remainder = Hi%stride, Wi%stride
    Ho += int(row_remainder!=0)
    Wo += int(col_remainder!=0)
    temp_map = np.zeros((B, C, Hi+size-row_remainder, Wi+size-col_remainder))
    temp_map[:, :, :Hi, :Wi] = input_numpy
    out = np.zeros((B,C,Ho,Wo))
    for b in range(B):
        for c in range(C):
            for h in range(Ho):
                for w in range(Wo):
                    startX, startY = w*stride, h*stride
                    out[b,c,h,w] = np.max(temp_map[b,c,startY:startY+size, startX:startX+size])
    return  out

4 BatchNorm2d

def batchnorm2d(input_numpy, eps, weight, bias):
    mean = input_numpy.mean(axis=(2,3))
    std = input_numpy.std(axis=(2,3))**2
    B, C, H, W = input_numpy.shape
    out = np.zeros((B,C,H,W))
    for b in range(B):
        for c in range(C):
            for h in range(H):
                for w in range(W):
                    out[b,c,h,w] = ((input_numpy[b,c,h,w]-mean[b,c])/(math.sqrt(std[b,c]+eps)))*weight[c]+bias[c]
    return out

5 Linear

def linear(input_numpy, weight_numpy, bias_numpy):
    weight_numpy = weight_numpy.T
    n11, n12 = input_numpy.shape
    n21, n22 = weight_numpy.shape
    out = np.zeros([n11, n22])
    for i in range(n11):
        for j in range(n22):
            for k in range(n12):
                out[i][j]+= input_numpy[i][k]*weight_numpy[k][j]
    for i in range(n11):
        for j in range(n22):
            out[i][j] += bias_numpy[j]
    return out
6 LeNet 测试

导入一个真实的网络测试一下。这里使用最简单的 LeNet:

class LeNet(nn.Module):
    def __init__(self, num_classes):
        super(LeNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1 ),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fclayer = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.ReLU(),
            nn.Linear(120,84),
            nn.ReLU(),
            nn.Linear(84,num_classes)
        ) 
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x=x.view(-1, 16*5*5)
        x=self.fclayer(x)
        return x

用 for 循环实现的 LeNet:

def numpy_LeNet(model, input_numpy):
    
    layer1_Conv2d = model.layer1[0]
    layer1_BatchNorm2d = model.layer1[1]
    layer1_ReLU = model.layer1[2]
    layer1_MaxPool2d = model.layer1[3]

    layer2_Conv2d = model.layer2[0]
    layer2_BatchNorm2d = model.layer2[1]
    layer2_ReLU = model.layer2[2]
    layer2_MaxPool2d = model.layer2[3]

    fclayer_Linear_1 = model.fclayer[0]
    fclayer_ReLU_1 = model.fclayer[1]
    fclayer_Linear_2 = model.fclayer[2]
    fclayer_ReLU_2 = model.fclayer[3]
    fclayer_Linear_3 = model.fclayer[4]


    layer1_Conv2d_numpy_out = conv2d(input_numpy, layer1_Conv2d.weight, layer1_Conv2d.bias, padding = 2)
    layer1_BatchNorm2d_numpy_out = batchnorm2d(layer1_Conv2d_numpy_out, layer1_BatchNorm2d.eps, layer1_BatchNorm2d.weight, layer1_BatchNorm2d.bias)
    layer1_ReLU_numpy_out = relu(layer1_BatchNorm2d_numpy_out)
    layer1_MaxPool2d_numpy_out = maxpool(layer1_ReLU_numpy_out, 2)

    layer2_Conv2d_numpy_out = conv2d(layer1_MaxPool2d_numpy_out, layer2_Conv2d.weight, layer2_Conv2d.bias, padding = 0)
    layer2_BatchNorm2d_numpy_out = batchnorm2d(layer2_Conv2d_numpy_out, layer2_BatchNorm2d.eps, layer2_BatchNorm2d.weight, layer2_BatchNorm2d.bias)
    layer2_ReLU_numpy_out = relu(layer2_BatchNorm2d_numpy_out)
    layer2_MaxPool2d_numpy_out = maxpool(layer2_ReLU_numpy_out, 2)

    layer2_MaxPool2d_numpy_out = layer2_MaxPool2d_numpy_out.reshape(-1, 16*5*5)

    fclayer_Linear_1_numpy_out = linear(layer2_MaxPool2d_numpy_out, fclayer_Linear_1.weight, fclayer_Linear_1.bias)
    fclayer_ReLU_1_numpy_out = relu(fclayer_Linear_1_numpy_out)
    fclayer_Linear_2_numpy_out = linear(fclayer_ReLU_1_numpy_out, fclayer_Linear_2.weight, fclayer_Linear_2.bias)
    fclayer_ReLU_2_numpy_out = relu(fclayer_Linear_2_numpy_out)
    fclayer_Linear_3_numpy_out = linear(fclayer_ReLU_2_numpy_out, fclayer_Linear_3.weight, fclayer_Linear_3.bias)

    
    return fclayer_Linear_3_numpy_out

你可能感兴趣的:(深度学习,学习笔记,神经网络,python,人工智能,卷积核)