这个项目将使用 PyTorch 实现图像风格迁移(Neural Style Transfer),让一张图片看起来具有另一张图片的“艺术风格”。
Python 3.8+
PyTorch(pip install torch torchvision)
PIL(pip install pillow)
CUDA(可选,但建议有 GPU)
bash
style_transfer/
├── style.jpg # 风格图像(如:星夜)
├── content.jpg # 内容图像(如:你的自拍)
├── style_transfer.py # 主程序
python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy
import matplotlib.pyplot as plt
python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
python
def load_image(img_path, max_size=400):
image = Image.open(img_path).convert('RGB')
# 限制最大尺寸
if max(image.size) > max_size:
size = max_size
else:
size = max(image.size)
in_transform = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
image = in_transform(image).unsqueeze(0)
return image.to(device)
python
def im_convert(tensor):
image = tensor.to("cpu").clone().detach()
image = image.squeeze(0)
image = transforms.ToPILImage()(image)
return image
python
content = load_image("content.jpg").to(device)
style = load_image("style.jpg").to(device)
python
vgg = models.vgg19(pretrained=True).features.to(device).eval()
我们只取模型中的某些层用于提取风格与内容特征。
python
def get_features(image, model, layers=None):
if layers is None:
layers = {
'0': 'conv1_1',
'5': 'conv2_1',
'10': 'conv3_1',
'19': 'conv4_1',
'21': 'conv4_2', # content representation
'28': 'conv5_1'
}
features = {}
x = image
for name, layer in model._modules.items():
x = layer(x)
if name in layers:
features[layers[name]] = x
return features
python
def gram_matrix(tensor):
b, d, h, w = tensor.size()
tensor = tensor.view(d, h * w)
gram = torch.mm(tensor, tensor.t())
return gram
python
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
# 为每个风格层计算 Gram 矩阵
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
python
target = content.clone().requires_grad_(True).to(device)
python
style_weights = {
'conv1_1': 1.0,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2
}
content_weight = 1e4 # α
style_weight = 1e2 # β
python
optimizer = optim.Adam([target], lr=0.003)
python
steps = 300
for i in range(1, steps+1):
target_features = get_features(target, vgg)
content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2']) ** 2)
style_loss = 0
for layer in style_weights:
target_feature = target_features[layer]
target_gram = gram_matrix(target_feature)
style_gram = style_grams[layer]
layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram) ** 2)
style_loss += layer_style_loss / (target_feature.shape[1] ** 2)
total_loss = content_weight * content_loss + style_weight * style_loss
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
if i % 50 == 0:
print(f"Step {i}, Total loss: {total_loss.item():.4f}")
python
final_img = im_convert(target)
final_img.save("result.jpg")
final_img.show()
️ 示例效果
将自拍和《星夜.jpg》结合,输出一张油画风格的人像。
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 图像加载和预处理
def load_image(img_path, max_size=400):
image = Image.open(img_path).convert('RGB')
if max(image.size) > max_size:
size = max_size
else:
size = max(image.size)
in_transform = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
image = in_transform(image).unsqueeze(0)
return image.to(device)
# 还原为可视图像
def im_convert(tensor):
image = tensor.to("cpu").clone().detach()
image = image.squeeze(0)
image = transforms.ToPILImage()(image)
return image
# 提取特征
def get_features(image, model, layers=None):
if layers is None:
layers = {
'0': 'conv1_1',
'5': 'conv2_1',
'10': 'conv3_1',
'19': 'conv4_1',
'21': 'conv4_2', # 内容层
'28': 'conv5_1'
}
features = {}
x = image
for name, layer in model._modules.items():
x = layer(x)
if name in layers:
features[layers[name]] = x
return features
# Gram矩阵
def gram_matrix(tensor):
b, d, h, w = tensor.size()
tensor = tensor.view(d, h * w)
gram = torch.mm(tensor, tensor.t())
return gram
# 主程序入口
def main():
# 加载图像
content = load_image("content.jpg")
style = load_image("style.jpg")
# 加载预训练模型
vgg = models.vgg19(pretrained=True).features.to(device).eval()
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
target = content.clone().requires_grad_(True).to(device)
# 权重设置
style_weights = {
'conv1_1': 1.0,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2
}
content_weight = 1e4
style_weight = 1e2
optimizer = optim.Adam([target], lr=0.003)
steps = 300
print("开始风格迁移...")
for i in range(1, steps + 1):
target_features = get_features(target, vgg)
content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2']) ** 2)
style_loss = 0
for layer in style_weights:
target_feature = target_features[layer]
target_gram = gram_matrix(target_feature)
style_gram = style_grams[layer]
layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram) ** 2)
style_loss += layer_style_loss / (target_feature.shape[1] ** 2)
total_loss = content_weight * content_loss + style_weight * style_loss
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
if i % 50 == 0:
print(f"Step {i}/{steps}, Total loss: {total_loss.item():.4f}")
# 保存结果
result = im_convert(target)
result.save("result.jpg")
print("风格迁移完成!结果保存在 result.jpg")
# 运行主函数
if __name__ == "__main__":
main()
把你的内容图命名为 content.jpg
把你的风格图命名为 style.jpg
放在与 style_transfer.py 同一个目录下
bash
python style_transfer.py
#️ 输出:
运行成功后,生成的图像将保存在:
result.jpg