多进程GPU调用问题

前几天遇到了一个问题,通过pool方法创建多进程,在进程外部load模型,然后在进程内部进行预测。发现进程死掉,没有报错。

import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from onehot import onehot
import torch
import torch.nn as nn
from FCN import FCN8s, FCN16s, FCNs, VGGNet
from torchvision import transforms
import pdb
from multiprocessing import Pool

root_path = '/home/public/non_im_list.txt'
img_list = []
for tt in open(root_path):
    img_list.append(tt[:-1])

os.environ["CUDA_VISION_DEVICES"] = "2"
def count_num(de_im_array):
    count = 0
    for h1 in range(160):
        for w1 in range(160):
            if(de_im_array[h1][w1]==0):
                count = count + 1
    return count
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
def img_2_tensor(imgfile):
    a = troch.Tensor(1,3,160,160)
    imgA = cv2.imread(imgfile)
    imgA = cv2.resize(imgA, (160,160))
    imgA = transform(imgA)
    imgA = torch.FloatTensor(imgA)
    a[0,:] = imgA
    return a
def cpr_2_array(array_1,array_2):
    array_3 = np.ones(array_1.shape, np.uint8)*255
    for h in range(160):
        for w in range(160):
            if(array_1[h][w]==0 and array_2[h][w]==0):
                array_3[h][w]=0
    for hh in range(160):
        for ww in range(160):
            if(array_3[hh][ww] == 0):
                array_2[hh][ww] =255
    return array_2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
fcn_model_person = torch.load('checkpoints/fcn_model_person.pt')
fcn_model_person = fcn_model_person.to(device)
 
fcn_model_motor = torch.load('checkpoints/fcn_model_motor.pt')
fcn_model_motor = fcn_model_motor.to(device)
 
'''
for i, j, k in os.walk(img_dir):
    for im in k:
        img_file = i + '/' +im 
        imput_im = cv2.imread(img_file)
        input_im = cv2.resize(imput_im, (160,160))
        img_template = img_2_tensor(img_file)
        output_person = fcn_model_person(img_template.cuda())
        output0_person = output_person[0,0,:]
        output1_person = output_person[0,1,:]
        output_np0_person = output0_person.cpu().detach().numpy().copy()
        output_np1_person = output1_person.cpu().detach().numpy().copy()
        output_np1_person = -output_np1_person
        output_np1_person[output_np1_person>0] = 255
        output_np1_person[output_np1_person<=0] = 0
        plt.subplot(1,5,1)
        plt.title('Image')
        plt.imshow(input_im)
 
        plt.subplot(1,5,2)
        plt.title('Person')
        plt.imshow(np.squeeze(output_np1_person),'gray')
 
        output_motor = fcn_model_person(img_template.cuda())
        output0_motor = output_motor[0,0,:]
        output1_motor = output_motor[0,1,:]
        output_np0_motor = output0_motor.cpu().detach().numpy().copy()
        output_np1_motor = output1_motor.cpu().detach().numpy().copy()
        output_np1_motor = -output_np1_motor
        output_np1_motor[output_np1_motor<0.5] = 0
        output_np1_motor[output_np1_motor>=0.5] = 255
 
        plt.subplot(1,5,3)
        plt.title('Motor')
        plt.imshow(np.squeeze(output_np1_motor),'gray')
 
 
        plt.subplot(1,5,4)
        plt.title('&&')
        cpr_arr,apr_a = cpr_2_array(output_np1_person, output_np1_motor)
        plt.imshow(np.squeeze(cpr_arr),'gray')
 
 
        plt.subplot(1,5,5)
        plt.title('--')
        plt.imshow(np.squeeze(cpr_arr),'gray'
'''
def Processing_task(TaskID):
    print(TaskID)
    for im_id in range(TaskID * 466146, TaskID * 466146 + 466146):
        img_path = img_list[im_id]
        img_template = img_2_tensor(img_path)
        output_person = fcn_model_person(img_template.cuda())
        output0_person = output_person[0,0,:]
        output1_person = output_person[0,1,:]
        output_np0_person = output0_person.cpu().detach().numpy().copy()
        output_np1_person = output1_person.cpu().detach().numpy().copy()
        output_np1_person = -output_np1_person
        output_np1_person[output_np1_person>0] = 255
        output_np1_person[output_np1_person<=0] = 0
 
        output_motor = fcn_model_person(img_template.cuda())
        output0_motor = output_motor[0,0,:]
        output1_motor = output_motor[0,1,:]
        output_np0_motor = output0_motor.cpu().detach().numpy().copy()
        output_np1_motor = output1_motor.cpu().detach().numpy().copy()
        output_np1_motor = -output_np1_motor
        output_np1_motor[output_np1_motor<0.5] = 0
        output_np1_motor[output_np1_motor>=0.5] = 255
        cpr_a = cpr_2_array(output_np1_person, output_np1_motor)
        count_arr = count_num(cpr_a)





if __name__=='__main__':
    print('Start Task ...')
    p = Pool(2)
    for po in range(2):
        p.apply_async(Processing_task, args=(po,))
    p.close()
    p.join()
    

然后把模型作为参数传进进程中

import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from onehot import onehot
import torch
import torch.nn as nn
from FCN import FCN8s, FCN16s, FCNs, VGGNet
from torchvision import transforms
import pdb
from multiprocessing import Pool

root_path = '/home/public/non_im_list.txt'
img_list = []
for tt in open(root_path):
    img_list.append(tt[:-1])

os.environ["CUDA_VISION_DEVICES"] = "2"
def count_num(de_im_array):
    count = 0
    for h1 in range(160):
        for w1 in range(160):
            if(de_im_array[h1][w1]==0):
                count = count + 1
    return count
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
def img_2_tensor(imgfile):
    a = troch.Tensor(1,3,160,160)
    imgA = cv2.imread(imgfile)
    imgA = cv2.resize(imgA, (160,160))
    imgA = transform(imgA)
    imgA = torch.FloatTensor(imgA)
    a[0,:] = imgA
    return a
def cpr_2_array(array_1,array_2):
    array_3 = np.ones(array_1.shape, np.uint8)*255
    for h in range(160):
        for w in range(160):
            if(array_1[h][w]==0 and array_2[h][w]==0):
                array_3[h][w]=0
    for hh in range(160):
        for ww in range(160):
            if(array_3[hh][ww] == 0):
                array_2[hh][ww] =255
    return array_2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
fcn_model_person = torch.load('checkpoints/fcn_model_person.pt')
fcn_model_person = fcn_model_person.to(device)
 
fcn_model_motor = torch.load('checkpoints/fcn_model_motor.pt')
fcn_model_motor = fcn_model_motor.to(device)
 
'''
for i, j, k in os.walk(img_dir):
    for im in k:
        img_file = i + '/' +im 
        imput_im = cv2.imread(img_file)
        input_im = cv2.resize(imput_im, (160,160))
        img_template = img_2_tensor(img_file)
        output_person = fcn_model_person(img_template.cuda())
        output0_person = output_person[0,0,:]
        output1_person = output_person[0,1,:]
        output_np0_person = output0_person.cpu().detach().numpy().copy()
        output_np1_person = output1_person.cpu().detach().numpy().copy()
        output_np1_person = -output_np1_person
        output_np1_person[output_np1_person>0] = 255
        output_np1_person[output_np1_person<=0] = 0
        plt.subplot(1,5,1)
        plt.title('Image')
        plt.imshow(input_im)
 
        plt.subplot(1,5,2)
        plt.title('Person')
        plt.imshow(np.squeeze(output_np1_person),'gray')
 
        output_motor = fcn_model_person(img_template.cuda())
        output0_motor = output_motor[0,0,:]
        output1_motor = output_motor[0,1,:]
        output_np0_motor = output0_motor.cpu().detach().numpy().copy()
        output_np1_motor = output1_motor.cpu().detach().numpy().copy()
        output_np1_motor = -output_np1_motor
        output_np1_motor[output_np1_motor<0.5] = 0
        output_np1_motor[output_np1_motor>=0.5] = 255
 
        plt.subplot(1,5,3)
        plt.title('Motor')
        plt.imshow(np.squeeze(output_np1_motor),'gray')
 
 
        plt.subplot(1,5,4)
        plt.title('&&')
        cpr_arr,apr_a = cpr_2_array(output_np1_person, output_np1_motor)
        plt.imshow(np.squeeze(cpr_arr),'gray')
 
 
        plt.subplot(1,5,5)
        plt.title('--')
        plt.imshow(np.squeeze(cpr_arr),'gray'
'''
def Processing_task(fcn_model_person,TaskID):
    print(TaskID)
    for im_id in range(TaskID * 466146, TaskID * 466146 + 466146):
        img_path = img_list[im_id]
        img_template = img_2_tensor(img_path)
        output_person = fcn_model_person(img_template.cuda())
        output0_person = output_person[0,0,:]
        output1_person = output_person[0,1,:]
        output_np0_person = output0_person.cpu().detach().numpy().copy()
        output_np1_person = output1_person.cpu().detach().numpy().copy()
        output_np1_person = -output_np1_person
        output_np1_person[output_np1_person>0] = 255
        output_np1_person[output_np1_person<=0] = 0
 
        output_motor = fcn_model_person(img_template.cuda())
        output0_motor = output_motor[0,0,:]
        output1_motor = output_motor[0,1,:]
        output_np0_motor = output0_motor.cpu().detach().numpy().copy()
        output_np1_motor = output1_motor.cpu().detach().numpy().copy()
        output_np1_motor = -output_np1_motor
        output_np1_motor[output_np1_motor<0.5] = 0
        output_np1_motor[output_np1_motor>=0.5] = 255
        cpr_a = cpr_2_array(output_np1_person, output_np1_motor)
        count_arr = count_num(cpr_a)





if __name__=='__main__':
    print('Start Task ...')
    p = Pool(2)
    for po in range(2):
        p.apply_async(Processing_task, args=(fcn_model_person,po))
    p.close()
    p.join()
    

出现一个错误:
"Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method"

最后的解决方案是将模型放到进程中load,就可以进行下去。

 

import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from onehot import onehot
import torch
import torch.nn as nn
from FCN import FCN8s, FCN16s, FCNs, VGGNet
from torchvision import transforms
import pdb
from multiprocessing import Pool

root_path = '/home/public/non_im_list.txt'
img_list = []
for tt in open(root_path):
    img_list.append(tt[:-1])

os.environ["CUDA_VISION_DEVICES"] = "2"
def count_num(de_im_array):
    count = 0
    for h1 in range(160):
        for w1 in range(160):
            if(de_im_array[h1][w1]==0):
                count = count + 1
    return count
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
def img_2_tensor(imgfile):
    a = troch.Tensor(1,3,160,160)
    imgA = cv2.imread(imgfile)
    imgA = cv2.resize(imgA, (160,160))
    imgA = transform(imgA)
    imgA = torch.FloatTensor(imgA)
    a[0,:] = imgA
    return a
def cpr_2_array(array_1,array_2):
    array_3 = np.ones(array_1.shape, np.uint8)*255
    for h in range(160):
        for w in range(160):
            if(array_1[h][w]==0 and array_2[h][w]==0):
                array_3[h][w]=0
    for hh in range(160):
        for ww in range(160):
            if(array_3[hh][ww] == 0):
                array_2[hh][ww] =255
    return array_2


 
'''
for i, j, k in os.walk(img_dir):
    for im in k:
        img_file = i + '/' +im 
        imput_im = cv2.imread(img_file)
        input_im = cv2.resize(imput_im, (160,160))
        img_template = img_2_tensor(img_file)
        output_person = fcn_model_person(img_template.cuda())
        output0_person = output_person[0,0,:]
        output1_person = output_person[0,1,:]
        output_np0_person = output0_person.cpu().detach().numpy().copy()
        output_np1_person = output1_person.cpu().detach().numpy().copy()
        output_np1_person = -output_np1_person
        output_np1_person[output_np1_person>0] = 255
        output_np1_person[output_np1_person<=0] = 0
        plt.subplot(1,5,1)
        plt.title('Image')
        plt.imshow(input_im)
 
        plt.subplot(1,5,2)
        plt.title('Person')
        plt.imshow(np.squeeze(output_np1_person),'gray')
 
        output_motor = fcn_model_person(img_template.cuda())
        output0_motor = output_motor[0,0,:]
        output1_motor = output_motor[0,1,:]
        output_np0_motor = output0_motor.cpu().detach().numpy().copy()
        output_np1_motor = output1_motor.cpu().detach().numpy().copy()
        output_np1_motor = -output_np1_motor
        output_np1_motor[output_np1_motor<0.5] = 0
        output_np1_motor[output_np1_motor>=0.5] = 255
 
        plt.subplot(1,5,3)
        plt.title('Motor')
        plt.imshow(np.squeeze(output_np1_motor),'gray')
 
 
        plt.subplot(1,5,4)
        plt.title('&&')
        cpr_arr,apr_a = cpr_2_array(output_np1_person, output_np1_motor)
        plt.imshow(np.squeeze(cpr_arr),'gray')
 
 
        plt.subplot(1,5,5)
        plt.title('--')
        plt.imshow(np.squeeze(cpr_arr),'gray'
'''
def Processing_task(fcn_model_person,TaskID):
    print(TaskID)device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    fcn_model_person = torch.load('checkpoints/fcn_model_person.pt')
    fcn_model_person = fcn_model_person.to(device)

    fcn_model_motor = torch.load('checkpoints/fcn_model_motor.pt')
    fcn_model_motor = fcn_model_motor.to(device)
    for im_id in range(TaskID * 466146, TaskID * 466146 + 466146):
        img_path = img_list[im_id]
        img_template = img_2_tensor(img_path)
        output_person = fcn_model_person(img_template.cuda())
        output0_person = output_person[0,0,:]
        output1_person = output_person[0,1,:]
        output_np0_person = output0_person.cpu().detach().numpy().copy()
        output_np1_person = output1_person.cpu().detach().numpy().copy()
        output_np1_person = -output_np1_person
        output_np1_person[output_np1_person>0] = 255
        output_np1_person[output_np1_person<=0] = 0
 
        output_motor = fcn_model_person(img_template.cuda())
        output0_motor = output_motor[0,0,:]
        output1_motor = output_motor[0,1,:]
        output_np0_motor = output0_motor.cpu().detach().numpy().copy()
        output_np1_motor = output1_motor.cpu().detach().numpy().copy()
        output_np1_motor = -output_np1_motor
        output_np1_motor[output_np1_motor<0.5] = 0
        output_np1_motor[output_np1_motor>=0.5] = 255
        cpr_a = cpr_2_array(output_np1_person, output_np1_motor)
        count_arr = count_num(cpr_a)





if __name__=='__main__':
    print('Start Task ...')
    p = Pool(2)
    for po in range(2):
        p.apply_async(Processing_task, args=(po,))
    p.close()
    p.join()
    

我对这个问题的猜想是,可能是最为全局变量的load模型,GPU被这个进程占用,进入子进程时再去申请GPU发现GPU被正在生命周期里的进程占用,拒绝访问,所以失败。只是猜想,有空再研究。

你可能感兴趣的:(多进程)