卷积部分我们使用在FNN作业时使用的轮子LayerBase进行构建。在前面的实验中我们已经进行了实现,这里我们进行整理:
class Conv2D(LayerBase):
def __init__(self, n_filters, filter_shape, input_shape=None, padding='same', stride=1):
self.n_filters = n_filters
self.filter_shape = filter_shape
self.padding = padding
self.stride = stride
self.input_shape = input_shape
def initialize(self, optimizer):
filter_height, filter_width = self.filter_shape
channels = self.input_shape[0]
limit = 1 / np.sqrt(np.prod(self.filter_shape))
self.W = np.random.uniform(-limit, limit, size=(self.n_filters, channels, filter_height, filter_width))
self.w0 = np.zeros((self.n_filters, 1))
self.W_opt = copy.copy(optimizer)
self.w0_opt = copy.copy(optimizer)
def parameters(self):
return np.prod(self.W.shape) + np.prod(self.w0.shape)
def forward(self, X):
batch_size, channels, height, width = X.shape
self.layer_input = X
self.X_col = image2column(X, self.filter_shape, stride=self.stride, output_shape=self.padding)
self.W_col = self.W.reshape((self.n_filters, -1))
output = self.W_col.dot(self.X_col) + self.w0
output = output.reshape(self.output_shape() + (batch_size, ))
return output.transpose(3,0,1,2)
def __call__(self, X):
return self.forward(X)
def backward(self, _grad_sum):
_grad_sum = _grad_sum.transpose(1, 2, 3, 0).reshape(self.n_filters, -1)
grad_w = _grad_sum.dot(self.X_col.T).reshape(self.W.shape)
grad_w0 = np.sum(_grad_sum, axis=1, keepdims=True)
self.W = self.W_opt.update(self.W, grad_w)
self.w0 = self.w0_opt.update(self.w0, grad_w0)
_grad_sum = column2image(_grad_sum,self.layer_input.shape,self.filter_shape,stride=self.stride,output_shape=self.padding)
return _grad_sum
其中包含的小操作函数如下,已经放入轮子的utils文件夹中:
def determine_padding(filter_shape, output_shape="same"):
if output_shape == "valid":
return (0, 0), (0, 0)
elif output_shape == "same":
filter_height, filter_width = filter_shape
pad_h1 = int(np.floor((filter_height - 1)/2))
pad_h2 = int(np.ceil((filter_height - 1)/2))
pad_w1 = int(np.floor((filter_width - 1)/2))
pad_w2 = int(np.ceil((filter_width - 1)/2))
return (pad_h1, pad_h2), (pad_w1, pad_w2)
def get_im2col_indices(images_shape, filter_shape, padding, stride=1):
batch_size, channels, height, width = images_shape
filter_height, filter_width = filter_shape
pad_h, pad_w = padding
out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1)
out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1)
i0 = np.repeat(np.arange(filter_height), filter_width)
i0 = np.tile(i0, channels)
i1 = stride * np.repeat(np.arange(out_height), out_width)
j0 = np.tile(np.arange(filter_width), filter_height * channels)
j1 = stride * np.tile(np.arange(out_width), out_height)
i = i0.reshape(-1, 1) + i1.reshape(1, -1)
j = j0.reshape(-1, 1) + j1.reshape(1, -1)
k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1)
return (k, i, j)
def image2column(images, filter_shape, stride, output_shape='same'):
filter_height, filter_width = filter_shape
pad_h, pad_w = determine_padding(filter_shape, output_shape)
images_padded = np.pad(images, ((0, 0), (0, 0), pad_h, pad_w), mode='constant')
k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride)
cols = images_padded[:, k, i, j]
channels = images.shape[1]
cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
return cols
def column2image(cols, images_shape, filter_shape, stride, output_shape='same'):
batch_size, channels, height, width = images_shape
pad_h, pad_w = determine_padding(filter_shape, output_shape)
height_padded = height + np.sum(pad_h)
width_padded = width + np.sum(pad_w)
images_padded = np.zeros((batch_size, channels, height_padded, width_padded))
k, i, j = get_im2col_indices(images_shape, filter_shape, (pad_h, pad_w), stride)
cols = cols.reshape(channels * np.prod(filter_shape), -1, batch_size)
cols = cols.transpose(2, 0, 1)
np.add.at(images_padded, (slice(None), k, i, j), cols)
return images_padded[:, :, pad_h[0]:height+pad_h[0], pad_w[0]:width+pad_w[0]]
池化层有均值池化和最大值池化。由于这两种卷积的反向传播方法不同,我们首先定义一个基类:
class PoolingBase(LayerBase):
def __init__(self, pool_shape=(2, 2), stride=1, padding=0):
self.pool_shape = pool_shape
self.stride = stride
self.padding = padding
def forward(self, X):
self.layer_input = X
batch_size, channels, height, width = X.shape
_, out_height, out_width = self.output_shape()
X = X.reshape(batch_size*channels, 1, height, width)
X_col = image2column(X, self.pool_shape, self.stride, self.padding)
output = self._pool_forward(X_col)
output = output.reshape(out_height, out_width, batch_size, channels)
output = output.transpose(2, 3, 0, 1)
return output
def __call__(self,X):
return self.forward(X)
def backward(self, _grad_sum):
batch_size, _, _, _ = _grad_sum.shape
channels, height, width = self.input_shape
_grad_sum = _grad_sum.transpose(2, 3, 0, 1).ravel()
_grad_sum_col = self._pool_backward(_grad_sum)
_grad_sum = column2image(_grad_sum_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0)
_grad_sum = _grad_sum.reshape((batch_size,) + self.input_shape)
return _grad_sum
基于这个基类,我们能够进行两种池化层的构建:
class MaxPooling2D(PoolingBase):
def _pool_forward(self, X_col):
arg_max = np.argmax(X_col, axis=0).flatten()
output = X_col[arg_max, range(arg_max.size)]
self.cache = arg_max
return output
def _pool_backward(self, _grad_sum):
accum_grad_col = np.zeros((np.prod(self.pool_shape), _grad_sum.size))
arg_max = self.cache
accum_grad_col[arg_max, range(_grad_sum.size)] = _grad_sum
return accum_grad_col
class AveragePooling2D(PoolingBase):
def _pool_forward(self, X_col):
output = np.mean(X_col, axis=0)
return output
def _pool_backward(self, _grad_sum):
accum_grad_col = np.zeros((np.prod(self.pool_shape), _grad_sum.size))
accum_grad_col[:, range(_grad_sum.size)] = 1. / accum_grad_col.shape[0] * _grad_sum
return accum_grad_col
class Sigmoid(OperatorBase):
def __init__(self):
pass
def __call__(self, x):
return self.operate(x)
def operate(self, x):
return 1 / (1 + np.exp(-x))
def gradient(self, x, _grad_sum=1):
f = self.operate(x)
return _grad_sum * f * (1 - f)
class ReLU(OperatorBase):
def __init__(self):
pass
def __call__(self, x):
return self.operate(x)
def operate(self, x):
return np.where(x >= 0, x, 0)
def gradient(self, x, _grad_sum=1):
return _grad_sum * np.where(x >= 0, 1, 0)
拉平操作非常简单:
class Flatten(Layer):
def __init__(self, input_shape=None):
self.prev_shape = None
self.input_shape = input_shape
def forward(self, X, training=True):
self.prev_shape = X.shape
return X.reshape((X.shape[0], -1))
def backward(self, _grad_sum):
return _grad_sum.reshape(self.prev_shape)
通过对X图像进行运行。卷积核使用班上同学相同的卷积核值。
img=np.array([[0,0,0,0,0,0,0,0,0],
[0,225,0,0,0,0,0,225,0],
[0,0,225,0,0,0,225,0,0],
[0,0,0,225,0,225,0,0,0],
[0,0,0,0,225,0,0,0,0],
[0,0,0,225,0,225,0,0,0],
[0,0,225,0,0,0,225,0,0],
[0,225,0,0,0,0,0,225,0],
[0,0,0,0,0,0,0,0,0]
])
kernel1=np.array([[1,-1,-1],
[-1,1,-1],
[-1,-1,1]])
kernel2=np.array([[1,-1,1],
[-1,1,-1],
[1,-1,1]])
kernel3=np.array([[-1,-1,1],
[-1,1,-1] ,
[1,-1,-1]])
我们这边的测试代码和前面较为相同,只是把实现的算子层更新为torch相对应的层即可。这里的代码不再赘述,卷积和与前面一致。我们发现结果完全相同。
下面是如何将自造轮子转换为torch算子:
model = [...]-->torch.nn.Sequential(...)
Conv2D-->torch.nn.Conv2d
MaxPooling2D--> torch.nn.MaxPool2d
ReLU-->torch.nn.ReLU
Flatten-->x.view(-1)
数据集我们采用现成的XO数据集,将图片导入目录,并且使用之前我们制作的DatasetsGenerator结合DataLoader进行使用。由于前面的实验报告已经使用过,这边不再赘述。
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 9, 3)
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(9, 5, 3)
self.relu = nn.ReLU()
self.fc1 = nn.Linear(27 * 27 * 5, 1200)
self.fc2 = nn.Linear(1200, 64)
self.fc3 = nn.Linear(64, 2)
def forward(self, x):
x = self.maxpool(self.relu(self.conv1(x)))
x = self.maxpool(self.relu(self.conv2(x)))
x = x.view(-1)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
模型训练我们可以直接使用Runner类,这边我们并没有直接使用Runner类,而是将训练过程单独开来,更加鲜明地展现训练过程:
model = Net()
crit = torch.nn.CrossEntropyLoss()
opti = optim.SGD(model.parameters(), lr=0.1)
epochs = 10
for epoch in range(epochs):
accum_loss = 0.
for i, data in enumerate(data_loader):
images, label = data
out = model(images)
loss = criterion(out, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
accum_loss += loss.item()
if (i + 1) % 10 == 0:
print('[%d/%d %d]loss: %f' % (epoch + 1, i + 1, epochs, accum_loss / 100))
accum_loss = 0.0
模型测试的代码如下:
path_test = r'training_data_sm'
data_test = datasets.ImageFolder(path_test, transform=transforms)
print("size of test_data:", len(data_test))
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
print(len(data_loader))
print(len(data_loader_test))
correct = 0
total = 0
for data in data_loader_test:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy: %f %%' % (100. * correct / total))
通过本次实验,我们了解到了: