import numpy as np
x = np.array([[-1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, -1, -1, 1, -1, -1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1]])
print("x=\n", x)
# 初始化 三个 卷积核
Kernel = [[0 for i in range(0, 3)] for j in range(0, 3)]
Kernel[0] = np.array([[1, -1, -1],
[-1, 1, -1],
[-1, -1, 1]])
Kernel[1] = np.array([[1, -1, 1],
[-1, 1, -1],
[1, -1, 1]])
Kernel[2] = np.array([[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]])
# --------------- 卷积 ---------------
stride = 1 # 步长
feature_map_h = 7 # 特征图的高
feature_map_w = 7 # 特征图的宽
feature_map = [0 for i in range(0, 3)] # 初始化3个特征图
for i in range(0, 3):
feature_map[i] = np.zeros((feature_map_h, feature_map_w)) # 初始化特征图
for h in range(feature_map_h): # 向下滑动,得到卷积后的固定行
for w in range(feature_map_w): # 向右滑动,得到卷积后的固定行的列
v_start = h * stride # 滑动窗口的起始行(高)
v_end = v_start + 3 # 滑动窗口的结束行(高)
h_start = w * stride # 滑动窗口的起始列(宽)
h_end = h_start + 3 # 滑动窗口的结束列(宽)
window = x[v_start:v_end, h_start:h_end] # 从图切出一个滑动窗口
for i in range(0, 3):
feature_map[i][h, w] = np.divide(np.sum(np.multiply(window, Kernel[i][:, :])), 9)
print("feature_map:\n", np.around(feature_map, decimals=2))
# --------------- 池化 ---------------
pooling_stride = 2 # 步长
pooling_h = 4 # 特征图的高
pooling_w = 4 # 特征图的宽
feature_map_pad_0 = [[0 for i in range(0, 8)] for j in range(0, 8)]
for i in range(0, 3): # 特征图 补 0 ,行 列 都要加 1 (因为上一层是奇数,池化窗口用的偶数)
feature_map_pad_0[i] = np.pad(feature_map[i], ((0, 1), (0, 1)), 'constant', constant_values=(0, 0))
# print("feature_map_pad_0 0:\n", np.around(feature_map_pad_0[0], decimals=2))
pooling = [0 for i in range(0, 3)]
for i in range(0, 3):
pooling[i] = np.zeros((pooling_h, pooling_w)) # 初始化特征图
for h in range(pooling_h): # 向下滑动,得到卷积后的固定行
for w in range(pooling_w): # 向右滑动,得到卷积后的固定行的列
v_start = h * pooling_stride # 滑动窗口的起始行(高)
v_end = v_start + 2 # 滑动窗口的结束行(高)
h_start = w * pooling_stride # 滑动窗口的起始列(宽)
h_end = h_start + 2 # 滑动窗口的结束列(宽)
for i in range(0, 3):
pooling[i][h, w] = np.max(feature_map_pad_0[i][v_start:v_end, h_start:h_end])
print("pooling:\n", np.around(pooling[0], decimals=2))
print("pooling:\n", np.around(pooling[1], decimals=2))
print("pooling:\n", np.around(pooling[2], decimals=2))
# --------------- 激活 ---------------
def relu(x):
return (abs(x) + x) / 2
relu_map_h = 7 # 特征图的高
relu_map_w = 7 # 特征图的宽
relu_map = [0 for i in range(0, 3)] # 初始化3个特征图
for i in range(0, 3):
relu_map[i] = np.zeros((relu_map_h, relu_map_w)) # 初始化特征图
for i in range(0, 3):
relu_map[i] = relu(feature_map[i])
print("relu map :\n",np.around(relu_map[0], decimals=2))
print("relu map :\n",np.around(relu_map[1], decimals=2))
print("relu map :\n",np.around(relu_map[2], decimals=2))
运行结果:
x=
[[-1 -1 -1 -1 -1 -1 -1 -1 -1]
[-1 1 -1 -1 -1 -1 -1 1 -1]
[-1 -1 1 -1 -1 -1 1 -1 -1]
[-1 -1 -1 1 -1 1 -1 -1 -1]
[-1 -1 -1 -1 1 -1 -1 -1 -1]
[-1 -1 -1 1 -1 1 -1 -1 -1]
[-1 -1 1 -1 -1 -1 1 -1 -1]
[-1 1 -1 -1 -1 -1 -1 1 -1]
[-1 -1 -1 -1 -1 -1 -1 -1 -1]]
feature_map:
[[[ 0.78 -0.11 0.11 0.33 0.56 -0.11 0.33]
[-0.11 1. -0.11 0.33 -0.11 0.11 -0.11]
[ 0.11 -0.11 1. -0.33 0.11 -0.11 0.56]
[ 0.33 0.33 -0.33 0.56 -0.33 0.33 0.33]
[ 0.56 -0.11 0.11 -0.33 1. -0.11 0.11]
[-0.11 0.11 -0.11 0.33 -0.11 1. -0.11]
[ 0.33 -0.11 0.56 0.33 0.11 -0.11 0.78]]
[[ 0.33 -0.56 0.11 -0.11 0.11 -0.56 0.33]
[-0.56 0.56 -0.56 0.33 -0.56 0.56 -0.56]
[ 0.11 -0.56 0.56 -0.78 0.56 -0.56 0.11]
[-0.11 0.33 -0.78 1. -0.78 0.33 -0.11]
[ 0.11 -0.56 0.56 -0.78 0.56 -0.56 0.11]
[-0.56 0.56 -0.56 0.33 -0.56 0.56 -0.56]
[ 0.33 -0.56 0.11 -0.11 0.11 -0.56 0.33]]
[[ 0.33 -0.11 0.56 0.33 0.11 -0.11 0.78]
[-0.11 0.11 -0.11 0.33 -0.11 1. -0.11]
[ 0.56 -0.11 0.11 -0.33 1. -0.11 0.11]
[ 0.33 0.33 -0.33 0.56 -0.33 0.33 0.33]
[ 0.11 -0.11 1. -0.33 0.11 -0.11 0.56]
[-0.11 1. -0.11 0.33 -0.11 0.11 -0.11]
[ 0.78 -0.11 0.11 0.33 0.56 -0.11 0.33]]]
pooling:
[[1. 0.33 0.56 0.33]
[0.33 1. 0.33 0.56]
[0.56 0.33 1. 0.11]
[0.33 0.56 0.11 0.78]]
pooling:
[[0.56 0.33 0.56 0.33]
[0.33 1. 0.56 0.11]
[0.56 0.56 0.56 0.11]
[0.33 0.11 0.11 0.33]]
pooling:
[[0.33 0.56 1. 0.78]
[0.56 0.56 1. 0.33]
[1. 1. 0.11 0.56]
[0.78 0.33 0.56 0.33]]
relu map :
[[0.78 0. 0.11 0.33 0.56 0. 0.33]
[0. 1. 0. 0.33 0. 0.11 0. ]
[0.11 0. 1. 0. 0.11 0. 0.56]
[0.33 0.33 0. 0.56 0. 0.33 0.33]
[0.56 0. 0.11 0. 1. 0. 0.11]
[0. 0.11 0. 0.33 0. 1. 0. ]
[0.33 0. 0.56 0.33 0.11 0. 0.78]]
relu map :
[[0.33 0. 0.11 0. 0.11 0. 0.33]
[0. 0.56 0. 0.33 0. 0.56 0. ]
[0.11 0. 0.56 0. 0.56 0. 0.11]
[0. 0.33 0. 1. 0. 0.33 0. ]
[0.11 0. 0.56 0. 0.56 0. 0.11]
[0. 0.56 0. 0.33 0. 0.56 0. ]
[0.33 0. 0.11 0. 0.11 0. 0.33]]
relu map :
[[0.33 0. 0.56 0.33 0.11 0. 0.78]
[0. 0.11 0. 0.33 0. 1. 0. ]
[0.56 0. 0.11 0. 1. 0. 0.11]
[0.33 0.33 0. 0.56 0. 0.33 0.33]
[0.11 0. 1. 0. 0.11 0. 0.56]
[0. 1. 0. 0.33 0. 0.11 0. ]
[0.78 0. 0.11 0.33 0.56 0. 0.33]]
形象化解释为:
首先,设置了一个99的X矩阵作为被卷积图像,并设置33的卷积核,得到一个7*7(9-3+1)的卷积结果,输出如下图所示:
feature_map[i][h, w] = np.divide(np.sum(np.multiply(window, Kernel[i][:, :])), 9)
这里使用np.divide
做除9操作保证基数不变。
对得到的三个矩阵进行池化,因为上一层是7*7奇数行列,池化窗口需要偶数行列,所以特征图补0,行列都要加1,生成三个池化矩阵后输出;
使用rule函数将得到矩阵中的负数全部替换成0,得到处理后的矩阵
代码中的注释更加全面,详情可参考上述代码。
# https://blog.csdn.net/qq_26369907/article/details/88366147
# https://zhuanlan.zhihu.com/p/405242579
import numpy as np
import torch
import torch.nn as nn
x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, -1, -1, 1, -1, -1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)
print("--------------- 卷积 ---------------")
conv1 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
[-1, 1, -1],
[-1, -1, 1]]
]])
conv2 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
[-1, 1, -1],
[1, -1, 1]]
]])
conv3 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]]
]])
feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)
print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)
print("--------------- 池化 ---------------")
max_pool = nn.MaxPool2d(2, padding=0, stride=2) # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1)) # pad 0 , Left Right Up Down
feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)
print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)
print("--------------- 激活 ---------------")
activation_function = nn.ReLU()
feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)
运行结果:
torch.Size([1, 1, 9, 9])
tensor([[[[-1., -1., -1., -1., -1., -1., -1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., -1., -1., 1., -1., -1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., -1., -1., -1., -1., -1., -1., -1.]]]])
--------------- 卷积 ---------------
tensor([[[[ 0.7804, -0.1085, 0.1137, 0.3359, 0.5581, -0.1085, 0.3359],
[-0.1085, 1.0026, -0.1085, 0.3359, -0.1085, 0.1137, -0.1085],
[ 0.1137, -0.1085, 1.0026, -0.3308, 0.1137, -0.1085, 0.5581],
[ 0.3359, 0.3359, -0.3308, 0.5581, -0.3308, 0.3359, 0.3359],
[ 0.5581, -0.1085, 0.1137, -0.3308, 1.0026, -0.1085, 0.1137],
[-0.1085, 0.1137, -0.1085, 0.3359, -0.1085, 1.0026, -0.1085],
[ 0.3359, -0.1085, 0.5581, 0.3359, 0.1137, -0.1085, 0.7804]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3508, -0.5381, 0.1286, -0.0937, 0.1286, -0.5381, 0.3508],
[-0.5381, 0.5730, -0.5381, 0.3508, -0.5381, 0.5730, -0.5381],
[ 0.1286, -0.5381, 0.5730, -0.7603, 0.5730, -0.5381, 0.1286],
[-0.0937, 0.3508, -0.7603, 1.0174, -0.7603, 0.3508, -0.0937],
[ 0.1286, -0.5381, 0.5730, -0.7603, 0.5730, -0.5381, 0.1286],
[-0.5381, 0.5730, -0.5381, 0.3508, -0.5381, 0.5730, -0.5381],
[ 0.3508, -0.5381, 0.1286, -0.0937, 0.1286, -0.5381, 0.3508]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3155, -0.1290, 0.5377, 0.3155, 0.0932, -0.1290, 0.7599],
[-0.1290, 0.0932, -0.1290, 0.3155, -0.1290, 0.9821, -0.1290],
[ 0.5377, -0.1290, 0.0932, -0.3512, 0.9821, -0.1290, 0.0932],
[ 0.3155, 0.3155, -0.3512, 0.5377, -0.3512, 0.3155, 0.3155],
[ 0.0932, -0.1290, 0.9821, -0.3512, 0.0932, -0.1290, 0.5377],
[-0.1290, 0.9821, -0.1290, 0.3155, -0.1290, 0.0932, -0.1290],
[ 0.7599, -0.1290, 0.0932, 0.3155, 0.5377, -0.1290, 0.3155]]]],
grad_fn=<DivBackward0>)
--------------- 池化 ---------------
torch.Size([1, 1, 4, 4])
tensor([[[[1.0026, 0.3359, 0.5581, 0.3359],
[0.3359, 1.0026, 0.3359, 0.5581],
[0.5581, 0.3359, 1.0026, 0.1137],
[0.3359, 0.5581, 0.1137, 0.7804]]]], grad_fn=<DivBackward0>)
tensor([[[[0.5730, 0.3508, 0.5730, 0.3508],
[0.3508, 1.0174, 0.5730, 0.1286],
[0.5730, 0.5730, 0.5730, 0.1286],
[0.3508, 0.1286, 0.1286, 0.3508]]]], grad_fn=<DivBackward0>)
tensor([[[[0.3155, 0.5377, 0.9821, 0.7599],
[0.5377, 0.5377, 0.9821, 0.3155],
[0.9821, 0.9821, 0.0932, 0.5377],
[0.7599, 0.3155, 0.5377, 0.3155]]]], grad_fn=<DivBackward0>)
--------------- 激活 ---------------
tensor([[[[0.7804, 0.0000, 0.1137, 0.3359, 0.5581, 0.0000, 0.3359],
[0.0000, 1.0026, 0.0000, 0.3359, 0.0000, 0.1137, 0.0000],
[0.1137, 0.0000, 1.0026, 0.0000, 0.1137, 0.0000, 0.5581],
[0.3359, 0.3359, 0.0000, 0.5581, 0.0000, 0.3359, 0.3359],
[0.5581, 0.0000, 0.1137, 0.0000, 1.0026, 0.0000, 0.1137],
[0.0000, 0.1137, 0.0000, 0.3359, 0.0000, 1.0026, 0.0000],
[0.3359, 0.0000, 0.5581, 0.3359, 0.1137, 0.0000, 0.7804]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3508, 0.0000, 0.1286, 0.0000, 0.1286, 0.0000, 0.3508],
[0.0000, 0.5730, 0.0000, 0.3508, 0.0000, 0.5730, 0.0000],
[0.1286, 0.0000, 0.5730, 0.0000, 0.5730, 0.0000, 0.1286],
[0.0000, 0.3508, 0.0000, 1.0174, 0.0000, 0.3508, 0.0000],
[0.1286, 0.0000, 0.5730, 0.0000, 0.5730, 0.0000, 0.1286],
[0.0000, 0.5730, 0.0000, 0.3508, 0.0000, 0.5730, 0.0000],
[0.3508, 0.0000, 0.1286, 0.0000, 0.1286, 0.0000, 0.3508]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3155, 0.0000, 0.5377, 0.3155, 0.0932, 0.0000, 0.7599],
[0.0000, 0.0932, 0.0000, 0.3155, 0.0000, 0.9821, 0.0000],
[0.5377, 0.0000, 0.0932, 0.0000, 0.9821, 0.0000, 0.0932],
[0.3155, 0.3155, 0.0000, 0.5377, 0.0000, 0.3155, 0.3155],
[0.0932, 0.0000, 0.9821, 0.0000, 0.0932, 0.0000, 0.5377],
[0.0000, 0.9821, 0.0000, 0.3155, 0.0000, 0.0932, 0.0000],
[0.7599, 0.0000, 0.0932, 0.3155, 0.5377, 0.0000, 0.3155]]]],
grad_fn=<DivBackward0>)
# https://blog.csdn.net/qq_26369907/article/details/88366147
# https://zhuanlan.zhihu.com/p/405242579
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 #有中文出现的情况,需要u'内容
x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, -1, -1, 1, -1, -1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)
img = x.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('原图')
plt.show()
print("--------------- 卷积 ---------------")
conv1 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
[-1, 1, -1],
[-1, -1, 1]]
]])
img = conv1.weight.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('Kernel 1')
plt.show()
conv2 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
[-1, 1, -1],
[1, -1, 1]]
]])
img = conv2.weight.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('Kernel 2')
plt.show()
conv3 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]]
]])
img = conv3.weight.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('Kernel 3')
plt.show()
feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)
print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)
img = feature_map1.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图1')
plt.show()
print("--------------- 池化 ---------------")
max_pool = nn.MaxPool2d(2, padding=0, stride=2) # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1)) # pad 0 , Left Right Up Down
feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)
print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)
img = feature_pool_1.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图1')
plt.show()
print("--------------- 激活 ---------------")
activation_function = nn.ReLU()
feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)
img = feature_relu1.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图1')
plt.show()
由上图可以看到,经过池化后特征结果更加明显,可作为判别标准。
【2021-2022 春学期】人工智能-作业5:卷积-池化-激活