以下是一个使用
PyTorch
实现自编码器的示例代码,该代码包括三个自编码器和一些辅助函数,用于训练和测试自编码器。
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
import matplotlib.pyplot as plt
# Define the Stacked Autoencoder class
class StackedAutoencoder(nn.Module):
def __init__(self, input_dim, hidden_dims):
super(StackedAutoencoder, self).__init__()
self.input_dim = input_dim
self.hidden_dims = hidden_dims
# Define the encoder layers
self.encoder1 = nn.Linear(input_dim, hidden_dims[0])
self.encoder2 = nn.Linear(hidden_dims[0], hidden_dims[1])
self.encoder3 = nn.Linear(hidden_dims[1], hidden_dims[2])
# Define the decoder layers
self.decoder3 = nn.Linear(hidden_dims[2], hidden_dims[1])
self.decoder2 = nn.Linear(hidden_dims[1], hidden_dims[0])
self.decoder1 = nn.Linear(hidden_dims[0], input_dim)
# Define the activation function
self.activation = nn.ReLU()
def encoder(self, x):
z1 = self.activation(self.encoder1(x))
z2 = self.activation(self.encoder2(z1))
z3 = self.activation(self.encoder3(z2))
return z3
def decoder(self, z):
xhat3 = self.activation(self.decoder3(z))
xhat2 = self.activation(self.decoder2(xhat3))
xhat1 = self.decoder1(xhat2)
return xhat1
def forward(self, x):
z = self.encoder(x)
xhat = self.decoder(z)
return xhat
def get_encoder_output(self, x):
return self.encoder(x)
# Define the training function
def train(model, train_loader, num_epochs, learning_rate):
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
for data in train_loader:
# Get the input data and target data
inputs, targets = data
inputs, targets = inputs.view(-1, 28*28), targets.view(-1, 28*28)
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(inputs)
loss = criterion(outputs, targets)
# Backward pass and optimization
loss.backward()
optimizer.step()
# Print the loss after each epoch
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Define the test function
def test(model, test_loader):
# Define the loss function
criterion = nn.MSELoss()
# Evaluate the model
test_loss = 0
with torch.no_grad():
for data in test_loader:
# Get the input data and target data
inputs, targets = data
inputs, targets = inputs.view(-1, 28*28), targets.view(-1, 28*28)
# Forward pass
outputs = model(inputs)
test_loss += criterion(outputs, targets).item()
# Print the average test loss
test_loss /= len(test_loader.dataset)
print('Average Test Loss: {:.4f}'.format(test_loss))
主程序
# Define the main function
def main():
# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Set the hyperparameters
input_dim = 28*28
hidden_dims = [256, 128, 64]
num_epochs = 10
batch_size = 128
learning_rate = 0.001
# Download the MNIST dataset and create data loaders
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# Create the Stacked Autoencoder model and move it to the device
model = StackedAutoencoder(input_dim, hidden_dims).to(device)
# Train the model
train(model, train_loader, num_epochs, learning_rate)
# Test the model
test(model, test_loader)
# Generate a random image and its reconstruction
with torch.no_grad():
z = torch.randn(1, hidden_dims[-1]).to(device)
xhat = model.decoder(z)
xhat = xhat.view(28, 28).cpu().numpy()
plt.imshow(xhat, cmap='gray')
plt.show()
if __name__ == '__main__':
main()
在
main()
函数中,首先设置了设备,然后定义了超参数,接着下载MNIST
数据集并创建数据加载器。然后创建了自编码器模型,并将其移动到设备上。接下来调用train()
函数进行训练,然后调用test()
函数进行测试。最后生成一个随机图像并进行重构,然后显示出来。
在
train()
函数中,定义了损失函数和优化器,然后对模型进行了训练。在test()
函数中,定义了损失函数,并对模型进行了测试。
在
test()
函数中,定义了损失函数,并对模型进行了测试。测试过程与训练过程类似,但是不需要进行梯度更新。最后返回测试损失的平均值。
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# 加载MNIST数据集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
# 定义自编码器模型
class StackedAutoencoder(nn.Module):
def __init__(self, input_dim, hidden_dims):
super(StackedAutoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dims[0]),
nn.ReLU()
)
self.decoder = nn.Sequential(
nn.Linear(hidden_dims[0], input_dim),
nn.ReLU()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# 训练自编码器
input_dim = 28 * 28 # MNIST图像尺寸为28x28
hidden_dims = [256, 128] # 隐藏层维度
model = StackedAutoencoder(input_dim, hidden_dims)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
for images, _ in trainloader:
images = images.view(images.size(0), -1)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, images)
loss.backward()
optimizer.step()
running_loss += loss.item()
epoch_loss = running_loss / len(trainloader)
print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
print("Training finished!")
# 使用自编码器进行图像重建
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=False)
dataiter = iter(testloader)
images, labels = dataiter.next()
images = images.view(images.size(0), -1)
outputs = model(images)
# 可视化原始图像和重建图像
import matplotlib.pyplot as plt
def imshow(img):
img = img / 2 + 0.5 # 反归一化
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.axis('off')
plt.show()
# 显示原始图像
imshow(torchvision.utils.make_grid(images.view(-1, 1, 28, 28)))
# 显示重建图像
imshow(torchvision.utils.make_grid(outputs.view(-1, 1, 28, 28)))
这个案例实现了一个简单的两层自编码器,用于对MNIST数据集中的手写数字图像进行重建。首先定义了一个StackedAutoencoder类,它包含一个编码器和一个解码器,其中编码器是一个包含ReLU激活函数的全连接层序列,解码器也是一个包含ReLU激活函数的全连接层序列。然后,通过使用MSE损失函数和Adam优化器对模型进行训练。在训练过程中,将图像展平为784维的向量,并将其输入到模型中,然后计算重建图像与原始图像之间的损失,并进行反向传播和参数更新。最后,使用训练好的模型对一批测试图像进行重建,并将原始图像和重建图像可视化。
自编码器可以包含更多的隐藏层,更复杂的结构和更多的训练步骤,以更好地学习数据的表示。此外,还可以尝试在编码器和解码器之间添加dropout层、使用不同的激活函数等来改进模型的性能。