目录
1、创建并运行并行进程
2、使用队列(Queue)来共享数据
3、进程池
4、进程锁
5、比较使用多进程和使用单进程执行一段代码的时间消耗
6、共享变量
多进程是计算机科学中的一个术语,它是指同时运行多个进程,这些进程可以同时执行不同的任务。在计算机操作系统中,进程是分配资源的基本单位,每个进程都有自己独立的内存空间和系统资源,互不干扰。
多进程技术可以用于实现并行计算和分布式计算,其中每个进程可以独立地执行不同的任务,从而可以同时处理多个任务,提高计算机的处理效率。
PyTorch支持使用torch.multiprocessing模块来实现多进程训练。这个模块提供了类似于Python标准库中的multiprocessing模块的功能,但是在PyTorch中进行了扩展,以便更好地支持分布式训练。
使用torch.multiprocessing模块,你可以创建多个进程,每个进程都可以有自己的PyTorch张量和模型参数。这样,你可以将数据分发到不同的进程中,让它们并行地执行训练过程。
import torch.multiprocessing as mp
def action(name,times):
init = 0
for i in range(times):
init += i
print("this process is " + name)
if __name__ =='__main__':
process1 = mp.Process(target=action,args=('process1',10000000))
process2 = mp.Process(target=action,args=('process2',1000))
process1.start()
process2.start()
#等待进程process2执行完毕后再继续执行后面的代码
#process2.join()
print("main process")
main process
this process is process2
this process is process1
import torch.multiprocessing as mp
def action(q,name,times):
init = 0
for i in range(times):
init += i
print("this process is " + name)
q.put(init)
if __name__ =='__main__':
q = mp.Queue()
process1 = mp.Process(target=action,args=(q,'process1',10000000))
process2 = mp.Process(target=action,args=(q,'process2',1000))
process1.start()
process2.start()
#等待进程process1执行完毕
process1.join()
#等待进程process2执行完毕
process2.join()
#从队列中取出进程process1的计算结果
result1 = q.get()
#从队列中取出进程process2的计算结果
result2 = q.get()
print(result1)
print(result2)
print("main process")
this process is process2
this process is process1
499500
49999995000000
main process
import torch.multiprocessing as mp
def action(times):
init = 0
for i in range(times):
init += i
return init
if __name__ =='__main__':
times = [1000,1000000]
#创建一个包含两个进程的进程池
pool = mp.Pool(processes=2)
res = pool.map(action,times)
print(res)
[499500, 499999500000]
import torch.multiprocessing as mp
import time
def action(v,num,lock):
lock.acquire()
for i in range(5):
time.sleep(0.1)
v.value += num
print(v.value)
lock.release()
if __name__ =='__main__':
#创建一个新的锁对象
lock = mp.Lock()
#创建一个新的共享变量v,初始值为0,数据类型为'i'(整数)
v = mp.Value('i',0)
p1 = mp.Process(target=action,args=(v,1,lock))
p2 = mp.Process(target=action,args=(v,2,lock))
p1.start()
p2.start()
p1.join()
p2.join()
2
4
6
8
10
11
12
13
14
15
import torch.multiprocessing as mp
import time
def action(name,times):
init = 0
for i in range(times):
init += i
print("this process is " + name)
def mpfun():
process1 = mp.Process(target=action,args=('process1',100000000))
process2 = mp.Process(target=action,args=('process2',100000000))
process1.start()
process2.start()
process1.join()
process2.join()
def spfun():
action('main process',100000000)
action('main process',100000000)
if __name__ =='__main__':
start_time = time.time()
mpfun()
end_time = time.time()
print(end_time-start_time)
start_time2 = time.time()
spfun()
end_time2 = time.time()
print(end_time2-start_time2)
this process is process1
this process is process2
8.2586669921875
this process is main process
this process is main process
7.6229119300842285
import torch.multiprocessing as mp
import torch
def action(element,t):
t[element] += (element+1) * 1000
if __name__ == "__main__":
t = torch.zeros(2)
t.share_memory_()
print('before mp: t=')
print(t)
p0 = mp.Process(target=action,args=(0,t))
p1 = mp.Process(target=action,args=(1,t))
p0.start()
p1.start()
p0.join()
p1.join()
print('after mp: t=')
print(t)
before mp: t=
tensor([0., 0.])
after mp: t=
tensor([1000., 2000.])
multigpu_lenet
multigpu_test