错误结果:
Traceback (most recent call last):
File "G:/python图片下载/dianshiju_db.py", line 25, in
print(imgdon("http://tupian.tupianzy.com/pic/upload/vod/2018-11-12/201811121542011934.jpg"))
File "G:/python图片下载/dianshiju_db.py", line 22, in imgdon
urllib.request.urlretrieve(img_url, filename=filename)
File "E:\python\lib\urllib\request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "E:\python\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "E:\python\lib\urllib\request.py", line 526, in open
response = self._open(req, data)
File "E:\python\lib\urllib\request.py", line 544, in _open
'_open', req)
File "E:\python\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "E:\python\lib\urllib\request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "E:\python\lib\urllib\request.py", line 1321, in do_open
r = h.getresponse()
File "E:\python\lib\http\client.py", line 1331, in getresponse
response.begin()
File "E:\python\lib\http\client.py", line 297, in begin
version, status, reason = self._read_status()
File "E:\python\lib\http\client.py", line 258, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "E:\python\lib\socket.py", line 586, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。
【Python爬虫错误】ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接
我个人的解决是使用函数回调解决的
意思就是当程序因为远程主机强制关闭而报错时,使用try- except在报错时从新调用该方法使其重新抓取,直至抓取成功,一般一两次就会成功,效率还算可以
源代码:
import os
import random
import urllib.request
def imgdon(url):
img_url = url
file_path = 'D:/book/tvimg/'
file_name = "tvimg" + str(int(random.uniform(20, 10) * 10 ** 14))
# 是否有这个路径
if not os.path.exists(file_path):
# 创建路径
os.makedirs(file_path)
# 获得图片后缀
file_suffix = os.path.splitext(img_url)[1]
# print(file_suffix)
# 拼接图片名(包含路径)
# filename = '{}{}{}{}'.format(file_path, os.sep, file_name, file_suffix)
filename = file_path + file_name + file_suffix
print(filename)
# 下载图片,并保存到文件夹中
urllib.request.urlretrieve(img_url, filename=filename)
return "http://www.klkj1999.top:8081/tvimg/"+file_name + file_suffix
print(imgdon("http://tupian.tupianzy.com/pic/upload/vod/2018-11-12/201811121542011934.jpg"))
解决后:
import os
import random
import urllib.request
def imgdon(url):
try:
img_url = url
file_path = 'D:/book/tvimg/'
file_name = "tvimg" + str(int(random.uniform(20, 10) * 10 ** 14))
# 是否有这个路径
if not os.path.exists(file_path):
# 创建路径
os.makedirs(file_path)
# 获得图片后缀
file_suffix = os.path.splitext(img_url)[1]
# print(file_suffix)
# 拼接图片名(包含路径)
# filename = '{}{}{}{}'.format(file_path, os.sep, file_name, file_suffix)
filename = file_path + file_name + file_suffix
print(filename)
# 下载图片,并保存到文件夹中
urllib.request.urlretrieve(img_url, filename=filename)
except:
imgdon(url)
return "http://www.klkj1999.top:8081/tvimg/"+file_name + file_suffix
print(imgdon("http://tupian.tupianzy.com/pic/upload/vod/2018-11-12/201811121542011934.jpg"))
解决后的结果:
G:\python图片下载\venv\Scripts\python.exe G:/python图片下载/dianshiju_db.py
D:/book/tvimg/tvimg1396168651250299.jpg
D:/book/tvimg/tvimg1502497444740666.jpg
D:/book/tvimg/tvimg1103678614205344.jpg
http://www.klkj1999.top:8081/tvimg/tvimg1396168651250299.jpg
Process finished with exit code 0