Process SpawnPoolWorker-21:
Traceback (most recent call last):
File "D:\Program Files\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap
self.run()
File "D:\Program Files\Anaconda3\lib\multiprocessing\process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "D:\Program Files\Anaconda3\lib\multiprocessing\pool.py", line 108, in worker
task = get()
File "D:\Program Files\Anaconda3\lib\multiprocessing\queues.py", line 345, in get
return ForkingPickler.loads(res)
AttributeError: Can't get attribute 'task' on
import pandas as pd
import os
from multiprocessing import Queue, Pool
def task(path):
# print(path)
df = pd.read_csv(path, header=None)
extreme = df[1][(df[27]) == 0].unique().tolist() # 排除涨跌停的股票
df = df[[x not in extreme for x in df[1]]]
df.to_csv(path, header=None, index=None)
print(path)
path = r'E:\Data\citics\201805'
p = Pool(20)
for file in os.listdir(path):
print(file)
filePath = os.path.join(path, file)
p.apply_async(task, args=(filePath,))
p.close()
p.join()
print('end')
后来在网上搜到了相似的问题,见 https://blog.csdn.net/geekleee/article/details/77838288 , 但是感觉解决方案有点繁琐,所以就自己摸索了下,最后发现如果将多进程代码放在 脚本主函数下: if _name_ == ‘_main_‘:
在运行整个脚本就不会报错!
import pandas as pd
import os
from multiprocessing import Queue, Pool
def task(path):
# print(path)
df = pd.read_csv(path, header=None)
extreme = df[1][(df[27]) == 0].unique().tolist() # 排除涨跌停的股票
df = df[[x not in extreme for x in df[1]]]
df.to_csv(path, header=None, index=None)
print(path)
if __name__ == '__main__':
path = r'E:\Data\citics\201805'
p = Pool(20)
for file in os.listdir(path):
print(file)
filePath = os.path.join(path, file)
p.apply_async(task, args=(filePath,))
p.close()
p.join()
print('end')