import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
#定义 U-Net 模型
class UNet(nn.Module):
def init(self, n_channels=3, n_classes=10):
super(UNet, self).init()
self.n_channels = n_channels
self.n_classes = n_classes
# 编码器
self.conv1 = nn.Conv2d(n_channels, 64, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(128)
self.relu2 = nn.ReLU()
self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2d(256)
self.relu3 = nn.ReLU()
self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
self.bn4 = nn.BatchNorm2d(512)
self.relu4 = nn.ReLU()
self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1)
self.bn5 = nn.BatchNorm2d(1024)
self.relu5 = nn.ReLU()
#这部分代码定义了U-Net模型的编码器结构。U-Net由对称的编码器和解码器组成,其中编码器将输入图像逐步下采样到更小的特征图,而解码器则将这些特征图上采样到原始图像大小。 具体来说,这段代码中包含了5个卷积层,每个卷积层后面跟着一个批归一化层和ReLU激活函数,用于提取并增强输入图像中的特征。每个卷积层都使用nn.Conv2d类来定义,它们的输入通道数和输出通道数依次为n_channels, 64, 128, 256, 512, 1024,表示每层提取的特征数量逐渐增加。kernel_size参数表示卷积核大小,stride参数表示卷积核移动的步幅,padding表示在输入特征图周围添加的边缘填充数目。bn1到bn5是批归一化层,用于加速训练过程并防止过拟合。relu1到relu5是ReLU激活函数,用于引入非线性因素并增强网络的表达能力。 这些卷积层被称为编码器,因为它们可以将输入图像逐步转换为更高级别的特征表示。在此之后,我们需要设计解码器来将这些特征图还原为最终的语义分割结果。 # 解码器
self.upconv6 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
self.conv6 = nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1)
self.bn6 = nn.BatchNorm2d(512)
self.relu6 = nn.ReLU()
self.upconv7 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
self.conv7 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
self.bn7 = nn.BatchNorm2d(256)
self.relu7 = nn.ReLU()
self.upconv8 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
self.conv8 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
self.bn8 = nn.BatchNorm2d(128)
self.relu8 = nn.ReLU()
self.upconv9 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
self.conv9 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
self.bn9 = nn.BatchNorm2d(64)
self.relu9 = nn.ReLU()
self.conv10 = nn.Conv2d(64, n_classes, kernel_size=1, stride=1)
#这部分代码定义了U-Net模型的解码器结构。与编码器相反,解码器将特征图逐步上采样到原始图像大小,并生成最终的语义分割结果。 具体来说,这段代码中包含了4个反卷积层和一个卷积层,每个反卷积层后面跟着一个卷积层、批归一化层和ReLU激活函数,用于上采样并增强输入特征图。每个反卷积层都使用nn.ConvTranspose2d类来定义,它们的输入通道数和输出通道数依次为1024, 512, 256, 128, 表示每层将特征图上采样两倍得到更高分辨率的特征表示,kernel_size参数表示反卷积核大小,stride参数表示反卷积核移动的步幅,在这里均为2。bn6到bn9是批归一化层,用于加速训练过程并防止过拟合。relu6到relu9是ReLU激活函数,用于引入非线性因素并增强网络的表达能力。最后,conv10是一个卷积层,用于生成最终的语义分割结果,输出通道数为n_classes,其值为10,代表我们要将图像分割成10个区域。 总之,这段代码实现了U-Net的编码器-解码器结构,将输入图像逐步转换为更高级别的特征表示,并在解码器中将这些特征图逐步上采样到原始图像大小,最终生成全局预测图。 def forward(self, x):
# 编码器前向传递
conv1_out = self.bn1(self.conv1(x))
conv1_out = self.relu1(conv1_out)
conv2_out = self.bn2(self.conv2(conv1_out))
conv2_out = self.relu2(conv2_out)
conv3_out = self.bn3(self.conv3(conv2_out))
conv3_out = self.relu3(conv3_out)
conv4_out = self.bn4(self.conv4(conv3_out))
conv4_out = self.relu4(conv4_out)
conv5_out = self.bn5(self.conv5(conv4_out))
conv5_out = self.relu5(conv5_out)
# 解码器前向传递
upconv6_out = self.upconv6(conv5_out)
concat6_out = torch.cat([upconv6_out, conv4_out], dim=1)
conv6_out = self.bn6(self.conv6(concat6_out))
conv6_out = self.relu6(conv6_out)
upconv7_out = self.upconv7(conv6_out)
concat7_out = torch.cat([upconv7_out, conv3_out], dim=1)
conv7_out = self.bn7(self.conv7(concat7_out))
conv7_out = self.relu7(conv7_out)
upconv8_out = self.upconv8(conv7_out)
concat8_out = torch.cat([upconv8_out, conv2_out], dim=1)
conv8_out = self.bn8(self.conv8(concat8_out))
conv8_out = self.relu8(conv8_out)
upconv9_out = self.upconv9(conv8_out)
concat9_out = torch.cat([upconv9_out, conv1_out], dim=1)
conv9_out = self.bn9(self.conv9(concat9_out))
conv9_out = self.relu9(conv9_out)
conv10_out = self.conv10(conv9_out)
return conv10_out数据预处理
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
#加载数据集,分为两个一个是普通图片另一个是被医生标记后的图片
train_set = ImageFolder('/path/to/train/folder', transform=transform)
test_set = ImageFolder('/path/to/test/folder', transform=transform)
#创建数据加载器
batch_size = 16
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
#定义模型和优化器
model = UNet(n_channels=1, n_classes=10)
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model = nn.DataParallel(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#训练模型
num_epochs = 10
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, masks) in enumerate(train_loader):
images = images.to(device)
masks = masks.to(device)
outputs = model(images)
loss = criterion(outputs, masks)
optimizer.zero_grad()
loss.backward()
optimizer.step() if (i+1) % 10 == 0:
print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format(epoch+1, num_epochs, i+1, total_step, loss.item()))#测试模型
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, masks in test_loader:
images = images.to(device)
masks = masks.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += masks.size(0) * masks.size(1) * masks.size(2)
correct += (predicted == masks).sum().item()
print('Accuracy of the model on the test images: {:.2f}%'.format(100 * correct / total))
首先定义了一个UNet类来实现U-Net模型的构建和前向传递。然后我们使用ImageFolder类从指定路径中读取图像,并应用transforms管道对其进行预处理。接着,我们创建了训练集和测试集的数据加载器,并将模型移到GPU上进行训练和测试。在训练过程中,我们使用交叉熵损失函数和Adam优化器进行优化。最后,在测试过程中,我们输出了模型在测试集上的准确率。
本文为原创内容,请转载或引用时著名来源。