from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.spiderloader import SpiderLoader
import time
from multiprocessing import Pool
list_all=[
['End_hainan','End_hebei','End_heilongjiang','End_henan','End_hubei','End_jiangsu','End_jiangxi','End_shanghai'],
['End_shanxi','End_sichuan','End_tianjing','End_xinjiang','End_fujian','End_shandong','End_yunnan','End_zhejiang'],
['End_anhui', 'End_beijing','End_neimenggu','End_chongqing','End_gansu','End_guangdong','End_guangxi','End_guizhou'],
['End_liaoning','End_jilin','End_ningxia',"End_hunan", 'End_shannxi']]
def process1(name):
# try:
process = CrawlerProcess(settings=get_project_settings())
process.crawl(name)
process.start()
if __name__ == '__main__':
for P in list_all:
# LIST1=P
start_3=time.time()
pool = Pool(processes=4)
# pool.daemon = True
pool.map(process1, P)#LIST1
pool.close()
pool.join()
end_3=time.time()
print('四个进程',end_3-start_3)
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 48, in mapstar
return list(map(*args))
File "E:\BaiduSyncdisk\shellcompany\shell\shell\spiders\main1.py", line 201, in process1
process.start()
File "C:\ProgramData\Anaconda3\lib\site-packages\scrapy\crawler.py", line 348, in start
reactor.run(installSignalHandlers=False) # blocking call
File "C:\ProgramData\Anaconda3\lib\site-packages\twisted\internet\base.py", line 1317, in run
self.startRunning(installSignalHandlers=installSignalHandlers)
File "C:\ProgramData\Anaconda3\lib\site-packages\twisted\internet\base.py", line 1299, in startRunning
ReactorBase.startRunning(cast(ReactorBase, self))
File "C:\ProgramData\Anaconda3\lib\site-packages\twisted\internet\base.py", line 843, in startRunning
raise error.ReactorNotRestartable()
twisted.internet.error.ReactorNotRestartable
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "E:/BaiduSyncdisk/shellcompany/shell/shell/spiders/main1.py", line 218, in <module>
pool.map(process1, P)#LIST1
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 771, in get
raise self._value
twisted.internet.error.ReactorNotRestartable
Process finished with exit code 1
因为这里线程池是四个线程Pool(processes=4),但我这里[‘End_hainan’,‘End_hebei’,‘End_heilongjiang’,‘End_henan’,‘End_hubei’,‘End_jiangsu’,‘End_jiangxi’,‘End_shanghai’],是八个py文件
导致这个进程池切换不正常,四个进程八个文件,导致进程堵塞冲突。