使用CIFAR10数据集,用三种框架构建Residual_Network作为例子,比较框架间的异同。
import torch
import torch.nn as nn
import torchvision
# Download and construct CIFAR-10 dataset.
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
train=True,
download=True)
# Fetch one data pair (read data from disk).
image, label = train_dataset[0]
print (image.size()) # torch.Size([3, 32, 32])
print (label) # 6
print (train_dataset.data.shape) # (50000, 32, 32, 3)
# type(train_dataset.targets)==list
print (len(train_dataset.targets)) # 50000
# Data loader (this provides queues and threads in a very simple way).
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=64,
shuffle=True)
"""
# 演示DataLoader返回的数据结构
# When iteration starts, queue and thread start to load data from files.
data_iter = iter(train_loader)
# Mini-batch images and labels.
images, labels = data_iter.next()
print(images.shape) # torch.Size([100, 3, 32, 32])
print(labels.shape)
# torch.Size([100]) 可见经过DataLoader后,labels由list变成了pytorch内置的tensor格式
"""
# 一般使用的话是下面这种
# Actual usage of the data loader is as below.
for images, labels in train_loader:
# Training code should be written here.
pass
import keras
from keras.datasets import cifar10
(train_x, train_y) , (test_x, test_y) = cifar10.load_data()
print(train_x.shape) # ndarray 类型: (50000, 32, 32, 3)
print(train_y.shape) # (50000, 1)
"""
1: pytorch 都是内置torch.xxTensor输入网络,而keras的则是原生ndarray类型
2: 对于multi-class的其中一种loss,即cross-entropy loss 而言,
pytorch的api为 CorssEntropyLoss, 但y_true不能用one-hoe编码!这与keras,tensorflow 都不同。tensorflow相应的api为softmax_cross_entropy
他们的api都仅限于multi-class classification
3*: 其实上面提到的api都属于categorical cross-entropy loss,
又叫 softmax loss,是函数内部先进行了 softmax 激活,再经过cross-entropy loss。
这个loss是cross-entropy loss的变种,
cross-entropy loss又叫logistic loss 或 multinomial logistic loss。
实现这种loss的函数不包括激活函数,需要自定义。
pytorch对应的api为BCEloss(仅限于 binary classification),
tensorflow 对应的api为 log_loss。
cross-entropy loss的第二个变种是 binary cross-entropy loss 又叫 sigmoid cross- entropy loss。
函数内部先进行了sigmoid激活,再经过cross-entropy loss。
pytorch对应的api为BCEWithLogitsLoss,
tensorflow对应的api为sigmoid_cross_entropy
"""
# pytorch
criterion = nn.CrossEntropyLoss()
...
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
# 对于multi-class cross-entropy loss
# 输入y_true不需要one-hot编码
loss = criterion(outputs, labels)
...
# keras
# 对于multi-class cross-entropy loss
# 输入y_true需要one-hot编码
train_y = keras.utils.to_categorical(train_y,10)
...
model.fit_generator(datagen.flow(train_x, train_y, batch_size=128),
validation_data=[test_x,test_y],
epochs=epochs,steps_per_epoch=steps_per_epoch, verbose=1)
...
model = myModel()
model.compile(optimizer=Adam(0.001),loss="categorical_crossentropy",metrics=["accuracy"])
model.fit_generator(datagen.flow(train_x, train_y, batch_size=128),
validation_data=[test_x,test_y],
epochs=epochs,steps_per_epoch=steps_per_epoch, verbose=1, workers=4)
#Evaluate the accuracy of the test dataset
accuracy = model.evaluate(x=test_x,y=test_y,batch_size=128)
# 保存整个网络
model.save("cifar10model.h5")
"""
# https://blog.csdn.net/jiandanjinxin/article/details/77152530
# 使用
# keras.models.load_model("cifar10model.h5")
# 只保存architecture
# json_string = model.to_json()
# open('my_model_architecture.json','w').write(json_string)
# 使用
# from keras.models import model_from_json
#model = model_from_json(open('my_model_architecture.json').read())
# 只保存weights
# model.save_weights('my_model_weights.h5')
#需要在代码中初始化一个完全相同的模型
# model.load_weights('my_model_weights.h5')
#需要加载权重到不同的网络结构(有些层一样)中,例如fine-tune或transfer-learning,可以通过层名字来加载模型
# model.load_weights('my_model_weights.h5', by_name=True)
"""
model = myModel()
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
# 将上次迭代计算的梯度值清0
optimizer.zero_grad()
# 反向传播,计算梯度值
loss.backward()
# 更新权值参数
optimizer.step()
# model.eval(),让model变成测试模式,对dropout和batch normalization的操作在训练和测试的时候是不一样的
# eval()时,pytorch会自动把BN和DropOut固定住,不会取平均,而是用训练好的值。
# 不然的话,一旦test的batch_size过小,很容易就会被BN层导致生成图片颜色失真极大。
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
# 这是只保存了weights
torch.save(model.state_dict(), 'resnet.ckpt')
"""
# 使用
# myModel.load_state_dict(torch.load('params.ckpt'))
# 若想保存整个网络(architecture + weights)
# torch.save(resnet, 'model.ckpt')
# 使用
#model = torch.load('model.ckpt')
"""
#https://blog.csdn.net/dss_dssssd/article/details/83892824
"""
1: 准备数据(注意数据格式不同)
2: 定义网络结构model
3: 定义损失函数
4: 定义优化算法 optimizer
5: 训练-keras
5.1:编译模型(传入loss function和optimizer等)
5.2:训练模型(fit or fit_generator,传入数据)
5: 训练-pytorch
迭代训练:
5.1:准备好tensor形式的输入数据和标签(可选)
5.2:前向传播计算网络输出output和计算损失函数loss
5.3:反向传播更新参数
以下三句话一句也不能少:
5.3.1:将上次迭代计算的梯度值清0
optimizer.zero_grad()
5.3.2:反向传播,计算梯度值
loss.backward()
5.3.3:更新权值参数
optimizer.step()
6: 在测试集上测试-keras
model.evaluate
6: 在测试集上测试-pytorch
遍历测试集,自定义metric
7: 保存网络(可选) 具体实现参考上面代码
"""
1、对于keras,不需要input_channels,函数内部会自动获得,而pytorch则需要显示声明input_channels
2、对于pytorch Conv2d需要指定padding,而keras的则是same和valid两种选项(valid即padding=0)
3、keras的Flatten操作可以视作pytorch中的view
4、keras的dimension一般顺序是(H, W, C) (tensorflow 为backend的话),
而pytorch的顺序则是( C, H, W)
4、具体的变换可以参照下方,但由于没有学过pytorch,keras也刚入门,不能保证正确,日后学的更深入了之后再来看看。
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
num_epochs = 80
learning_rate = 0.001
# Image preprocessing modules
transform = transforms.Compose([
transforms.Pad(4),
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32),
transforms.ToTensor()])
# CIFAR-10 dataset
# train_dataset.data.shape
#Out[31]: (50000, 32, 32, 3)
# train_dataset.targets list
# len(list)=5000
train_dataset = torchvision.datasets.CIFAR10(root='./data/',
train=True,
transform=transform,
download=True)
test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=100,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=100,
shuffle=False)
# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False)
# Residual block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[1], 2)
self.layer3 = self.make_layer(block, 64, layers[2], 2)
self.avg_pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
# [*[1,2,3]]
# Out[96]: [1, 2, 3]
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x) # out.shape:torch.Size([100, 16, 32, 32])
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)
# pip install torchsummary or
# git clone https://github.com/sksq96/pytorch-summary
from torchsummary import summary
# input_size=(C,H,W)
summary(model, input_size=(3, 32, 32))
images,labels = iter(train_loader).next()
outputs = model(images)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# For updating learning rate
def update_lr(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Decay learning rate
if (epoch+1) % 20 == 0:
curr_lr /= 3
update_lr(optimizer, curr_lr)
# Test the model
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'resnet.ckpt')
"""
#pytorch
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False)
"""
def conv3x3(x,out_channels, stride=1):
#out = spatial_2d_padding(x,padding=((1, 1), (1, 1)), data_format="channels_last")
return Conv2D(filters=out_channels, kernel_size=[3,3], strides=(stride,stride),padding="same")(x)
"""
# pytorch
# Residual block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
"""
def ResidualBlock(x, out_channels, stride=1, downsample=False):
residual = x
out = conv3x3(x, out_channels,stride)
out = BatchNormalization()(out)
out = Activation("relu")(out)
out = conv3x3(out, out_channels)
out = BatchNormalization()(out)
if downsample:
residual = conv3x3(residual, out_channels, stride=stride)
residual = BatchNormalization()(residual)
out = keras.layers.add([residual,out])
out = Activation("relu")(out)
return out
"""
#pytorch
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
# [*[1,2,3]]
# Out[96]: [1, 2, 3]
return nn.Sequential(*layers)
"""
def make_layer(x, out_channels, blocks, stride=1):
# tf backend: x.output_shape[-1]==out_channels
#print("x.shape[-1] ",x.shape[-1])
downsample = False
if (stride != 1) or (out_channels != x.shape[-1]):
downsample = True
out = ResidualBlock(x, out_channels, stride, downsample)
for i in range(1, blocks):
out = ResidualBlock(out, out_channels)
return out
def KerasResidual(input_shape):
images = Input(input_shape)
out = conv3x3(images,16) # out.shape=(None, 32, 32, 16)
out = BatchNormalization()(out)
out = Activation("relu")(out)
layer1_out = make_layer(out, 16, layers[0])
layer2_out = make_layer(layer1_out, 32, layers[1], 2)
layer3_out = make_layer(layer2_out, 64, layers[2], 2)
out = AveragePooling2D(pool_size=(8,8))(layer3_out)
out = Flatten()(out)
# pytorch 的nn.CrossEntropyLoss()会首先执行softmax计算
# 当换成keras时,没有tf类似的softmax_cross_entropy
# 自带的categorical_crossentropy不会执行激活操作,因此得在Dense层加上activation
out = Dense(units=10, activation="softmax")(out)
model = Model(inputs=images,outputs=out)
return model
input_shape=(32, 32, 3)
layers=[2, 2, 2]
mymodel = KerasResidual(input_shape)
mymodel.summary()
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 32, 32] 432
BatchNorm2d-2 [-1, 16, 32, 32] 32
ReLU-3 [-1, 16, 32, 32] 0
Conv2d-4 [-1, 16, 32, 32] 2,304
BatchNorm2d-5 [-1, 16, 32, 32] 32
ReLU-6 [-1, 16, 32, 32] 0
Conv2d-7 [-1, 16, 32, 32] 2,304
BatchNorm2d-8 [-1, 16, 32, 32] 32
ReLU-9 [-1, 16, 32, 32] 0
ResidualBlock-10 [-1, 16, 32, 32] 0
Conv2d-11 [-1, 16, 32, 32] 2,304
BatchNorm2d-12 [-1, 16, 32, 32] 32
ReLU-13 [-1, 16, 32, 32] 0
Conv2d-14 [-1, 16, 32, 32] 2,304
BatchNorm2d-15 [-1, 16, 32, 32] 32
ReLU-16 [-1, 16, 32, 32] 0
ResidualBlock-17 [-1, 16, 32, 32] 0
Conv2d-18 [-1, 32, 16, 16] 4,608
BatchNorm2d-19 [-1, 32, 16, 16] 64
ReLU-20 [-1, 32, 16, 16] 0
Conv2d-21 [-1, 32, 16, 16] 9,216
BatchNorm2d-22 [-1, 32, 16, 16] 64
Conv2d-23 [-1, 32, 16, 16] 4,608
BatchNorm2d-24 [-1, 32, 16, 16] 64
ReLU-25 [-1, 32, 16, 16] 0
ResidualBlock-26 [-1, 32, 16, 16] 0
Conv2d-27 [-1, 32, 16, 16] 9,216
BatchNorm2d-28 [-1, 32, 16, 16] 64
ReLU-29 [-1, 32, 16, 16] 0
Conv2d-30 [-1, 32, 16, 16] 9,216
BatchNorm2d-31 [-1, 32, 16, 16] 64
ReLU-32 [-1, 32, 16, 16] 0
ResidualBlock-33 [-1, 32, 16, 16] 0
Conv2d-34 [-1, 64, 8, 8] 18,432
BatchNorm2d-35 [-1, 64, 8, 8] 128
ReLU-36 [-1, 64, 8, 8] 0
Conv2d-37 [-1, 64, 8, 8] 36,864
BatchNorm2d-38 [-1, 64, 8, 8] 128
Conv2d-39 [-1, 64, 8, 8] 18,432
BatchNorm2d-40 [-1, 64, 8, 8] 128
ReLU-41 [-1, 64, 8, 8] 0
ResidualBlock-42 [-1, 64, 8, 8] 0
Conv2d-43 [-1, 64, 8, 8] 36,864
BatchNorm2d-44 [-1, 64, 8, 8] 128
ReLU-45 [-1, 64, 8, 8] 0
Conv2d-46 [-1, 64, 8, 8] 36,864
BatchNorm2d-47 [-1, 64, 8, 8] 128
ReLU-48 [-1, 64, 8, 8] 0
ResidualBlock-49 [-1, 64, 8, 8] 0
AvgPool2d-50 [-1, 64, 1, 1] 0
Linear-51 [-1, 10] 650
================================================================
Total params: 195,738
Trainable params: 195,738
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 3.63
Params size (MB): 0.75
Estimated Total Size (MB): 4.38
----------------------------------------------------------------
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_26 (InputLayer) (None, 32, 32, 3) 0
__________________________________________________________________________________________________
conv2d_103 (Conv2D) (None, 32, 32, 16) 448 input_26[0][0]
__________________________________________________________________________________________________
batch_normalization_99 (BatchNo (None, 32, 32, 16) 64 conv2d_103[0][0]
__________________________________________________________________________________________________
activation_87 (Activation) (None, 32, 32, 16) 0 batch_normalization_99[0][0]
__________________________________________________________________________________________________
conv2d_104 (Conv2D) (None, 32, 32, 16) 2320 activation_87[0][0]
__________________________________________________________________________________________________
batch_normalization_100 (BatchN (None, 32, 32, 16) 64 conv2d_104[0][0]
__________________________________________________________________________________________________
activation_88 (Activation) (None, 32, 32, 16) 0 batch_normalization_100[0][0]
__________________________________________________________________________________________________
conv2d_105 (Conv2D) (None, 32, 32, 16) 2320 activation_88[0][0]
__________________________________________________________________________________________________
batch_normalization_101 (BatchN (None, 32, 32, 16) 64 conv2d_105[0][0]
__________________________________________________________________________________________________
add_34 (Add) (None, 32, 32, 16) 0 activation_87[0][0]
batch_normalization_101[0][0]
__________________________________________________________________________________________________
activation_89 (Activation) (None, 32, 32, 16) 0 add_34[0][0]
__________________________________________________________________________________________________
conv2d_106 (Conv2D) (None, 32, 32, 16) 2320 activation_89[0][0]
__________________________________________________________________________________________________
batch_normalization_102 (BatchN (None, 32, 32, 16) 64 conv2d_106[0][0]
__________________________________________________________________________________________________
activation_90 (Activation) (None, 32, 32, 16) 0 batch_normalization_102[0][0]
__________________________________________________________________________________________________
conv2d_107 (Conv2D) (None, 32, 32, 16) 2320 activation_90[0][0]
__________________________________________________________________________________________________
batch_normalization_103 (BatchN (None, 32, 32, 16) 64 conv2d_107[0][0]
__________________________________________________________________________________________________
add_35 (Add) (None, 32, 32, 16) 0 activation_89[0][0]
batch_normalization_103[0][0]
__________________________________________________________________________________________________
activation_91 (Activation) (None, 32, 32, 16) 0 add_35[0][0]
__________________________________________________________________________________________________
conv2d_108 (Conv2D) (None, 16, 16, 32) 4640 activation_91[0][0]
__________________________________________________________________________________________________
batch_normalization_104 (BatchN (None, 16, 16, 32) 128 conv2d_108[0][0]
__________________________________________________________________________________________________
activation_92 (Activation) (None, 16, 16, 32) 0 batch_normalization_104[0][0]
__________________________________________________________________________________________________
conv2d_110 (Conv2D) (None, 16, 16, 32) 4640 activation_91[0][0]
__________________________________________________________________________________________________
conv2d_109 (Conv2D) (None, 16, 16, 32) 9248 activation_92[0][0]
__________________________________________________________________________________________________
batch_normalization_106 (BatchN (None, 16, 16, 32) 128 conv2d_110[0][0]
__________________________________________________________________________________________________
batch_normalization_105 (BatchN (None, 16, 16, 32) 128 conv2d_109[0][0]
__________________________________________________________________________________________________
add_36 (Add) (None, 16, 16, 32) 0 batch_normalization_106[0][0]
batch_normalization_105[0][0]
__________________________________________________________________________________________________
activation_93 (Activation) (None, 16, 16, 32) 0 add_36[0][0]
__________________________________________________________________________________________________
conv2d_111 (Conv2D) (None, 16, 16, 32) 9248 activation_93[0][0]
__________________________________________________________________________________________________
batch_normalization_107 (BatchN (None, 16, 16, 32) 128 conv2d_111[0][0]
__________________________________________________________________________________________________
activation_94 (Activation) (None, 16, 16, 32) 0 batch_normalization_107[0][0]
__________________________________________________________________________________________________
conv2d_112 (Conv2D) (None, 16, 16, 32) 9248 activation_94[0][0]
__________________________________________________________________________________________________
batch_normalization_108 (BatchN (None, 16, 16, 32) 128 conv2d_112[0][0]
__________________________________________________________________________________________________
add_37 (Add) (None, 16, 16, 32) 0 activation_93[0][0]
batch_normalization_108[0][0]
__________________________________________________________________________________________________
activation_95 (Activation) (None, 16, 16, 32) 0 add_37[0][0]
__________________________________________________________________________________________________
conv2d_113 (Conv2D) (None, 8, 8, 64) 18496 activation_95[0][0]
__________________________________________________________________________________________________
batch_normalization_109 (BatchN (None, 8, 8, 64) 256 conv2d_113[0][0]
__________________________________________________________________________________________________
activation_96 (Activation) (None, 8, 8, 64) 0 batch_normalization_109[0][0]
__________________________________________________________________________________________________
conv2d_115 (Conv2D) (None, 8, 8, 64) 18496 activation_95[0][0]
__________________________________________________________________________________________________
conv2d_114 (Conv2D) (None, 8, 8, 64) 36928 activation_96[0][0]
__________________________________________________________________________________________________
batch_normalization_111 (BatchN (None, 8, 8, 64) 256 conv2d_115[0][0]
__________________________________________________________________________________________________
batch_normalization_110 (BatchN (None, 8, 8, 64) 256 conv2d_114[0][0]
__________________________________________________________________________________________________
add_38 (Add) (None, 8, 8, 64) 0 batch_normalization_111[0][0]
batch_normalization_110[0][0]
__________________________________________________________________________________________________
activation_97 (Activation) (None, 8, 8, 64) 0 add_38[0][0]
__________________________________________________________________________________________________
conv2d_116 (Conv2D) (None, 8, 8, 64) 36928 activation_97[0][0]
__________________________________________________________________________________________________
batch_normalization_112 (BatchN (None, 8, 8, 64) 256 conv2d_116[0][0]
__________________________________________________________________________________________________
activation_98 (Activation) (None, 8, 8, 64) 0 batch_normalization_112[0][0]
__________________________________________________________________________________________________
conv2d_117 (Conv2D) (None, 8, 8, 64) 36928 activation_98[0][0]
__________________________________________________________________________________________________
batch_normalization_113 (BatchN (None, 8, 8, 64) 256 conv2d_117[0][0]
__________________________________________________________________________________________________
add_39 (Add) (None, 8, 8, 64) 0 activation_97[0][0]
batch_normalization_113[0][0]
__________________________________________________________________________________________________
activation_99 (Activation) (None, 8, 8, 64) 0 add_39[0][0]
__________________________________________________________________________________________________
average_pooling2d_2 (AveragePoo (None, 1, 1, 64) 0 activation_99[0][0]
__________________________________________________________________________________________________
flatten_2 (Flatten) (None, 64) 0 average_pooling2d_2[0][0]
__________________________________________________________________________________________________
dense_2 (Dense) (None, 10) 650 flatten_2[0][0]
==================================================================================================
Total params: 197,418
Trainable params: 196,298
Non-trainable params: 1,120
__________________________________________________________________________________________________