[pytorch] 预训练网络(如timm)使用模型并行(Model Parallel)

预训练网络使用模型并行

  • 1. 基础使用方法:自己搭建网络并指定每个block的gpu
  • 2. torchvision的模型并行
  • 3. 预训练网络(如timm)使用模型并行(Model Parallel)

模型并行的几种使用情况

1. 基础使用方法:自己搭建网络并指定每个block的gpu

如果使用服务器,需要指定GPU个数 gpu:2

#!/bin/bash

#SBATCH -J LIYIHAO
#SBATCH --output=cataract_RL_%j.out
#SBATCH --error=cataract_RL_%j.txt
#SBATCH --gres=gpu:2
#SBATCH --nodelist=ns3185995
#SBATCH -c 9

srun singularity exec --nv /home/yihao/GOALS/ra_new.sif python3 Basic_Usage.py
import torch
import torch.nn as nn
import torch.optim as optim


print('cuda',torch.cuda.is_available())
print('gpu number',torch.cuda.device_count())
for i in range(torch.cuda.device_count()):
    print(torch.cuda.get_device_name(i))
    
    
class ToyModel(nn.Module):
    def __init__(self):
        super(ToyModel, self).__init__()
        self.net1 = torch.nn.Linear(10, 10).to('cuda:0')
        self.relu = torch.nn.ReLU()
        self.net2 = torch.nn.Linear(10, 5).to('cuda:1')

    def forward(self, x):
        x = self.relu(self.net1(x.to('cuda:0')))
        return self.net2(x.to('cuda:1'))
        
        
model = ToyModel()
for name, param in  model.named_parameters():
    print(name, param.device)
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

optimizer.zero_grad()
outputs = model(torch.randn(20, 10))
labels = torch.randn(20, 5).to('cuda:1')
loss_fn(outputs, labels).backward()
optimizer.step()

print('finish')

输出

cuda True
gpu number 2
Tesla V100S-PCIE-32GB
Tesla V100S-PCIE-32GB
net1.weight cuda:0
net1.bias cuda:0
net2.weight cuda:1
net2.bias cuda:1
finish

2. torchvision的模型并行

import torch.nn as nn
from torchvision.models.resnet import ResNet, Bottleneck

num_classes = 1000

class ModelParallelResNet50(ResNet):
    def init(self, args, **kwargs):
        super(ModelParallelResNet50, self).init(
            Bottleneck, [3, 4, 6, 3], num_classes=num_classes,args, **kwargs)

        self.seq1 = nn.Sequential(
            self.conv1,
            self.bn1,
            self.relu,
            self.maxpool,

            self.layer1,
            self.layer2
        ).to('cuda:0')  # 放置在第1个GPU上

        self.seq2 = nn.Sequential(
            self.layer3,
            self.layer4,
            self.avgpool,
        ).to('cuda:1')  # 放置在第2个GPU上

        self.fc.to('cuda:1')

    def forward(self, x):
        x = self.seq2(self.seq1(x).to('cuda:1'))
        return self.fc(x.view(x.size(0), -1))

model = ModelParallelResNet50()

3. 预训练网络(如timm)使用模型并行(Model Parallel)

继承现有的网络,并将不同的层组合成新的block指定GPU

import torch
import torch.nn as nn
import torch.optim as optim
import timm

Res_timm2 = timm.create_model('resnet34', pretrained=True)

class ModelParallelResNet34(nn.Module):
    def __init__(self, Res_timm2):
        super(ModelParallelResNet34, self).__init__()

        self.seq1 = nn.Sequential(
            Res_timm2.conv1,
            Res_timm2.bn1,
            Res_timm2.act1,
            Res_timm2.maxpool,

            Res_timm.layer1,
            Res_timm.layer2
        ).to('cuda:0')  # 放置在第1个GPU上

        self.seq2 = nn.Sequential(
            Res_timm2.layer3,
            Res_timm2.layer4,
            Res_timm2.global_pool,
        ).to('cuda:1')  # 放置在第2个GPU上

        self.fc = Res_timm2.fc.to('cuda:1')

    def forward(self, x):
        x = self.seq2(self.seq1(x).to('cuda:1'))
        return self.fc(x)

model2 = ModelParallelResNet34(Res_timm2)

print("----method 2 Resnet34 = ")
for name, param in  model2.named_parameters():
    print(name, param.device)

你可能感兴趣的:(pytorch,深度学习,python)