今天我在写爬虫的时候,有点奇怪,就是可以达到我的预期效果,但是有bug我实在是找不出来啦,这是我的代码:
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import os
import traceback
def write_path(data):
count = 0
for i in data:
try:
r = requests.get(data[i])
print(r.status_code)
path = os.path.join(r"E:\xin wenjian ja\gaoqingbizi", str(i) + ".jpg")
with open(path, "ab") as f:
f.write(r.content)
count = count + 1
print("\r当前进度:{:.2f}%".format(count * 100 / len(data)), end = "")
except:
count = count + 1
print("\r当前进度:{:.2f}%".format(count * 100 / len(data)), end="")
traceback.print_exc()
continue
def get_products(html, data):
soup = BeautifulSoup(html, "lxml")
items = soup.find_all(name="img")
for item in items:
try:
key = item.attrs["alt"]
value = item.attrs["src"]
if key == "4K/5K/8K超清壁纸":
continue
data[key] = value
except:
continue
def index_page(url, data):
try:
browser = webdriver.Firefox()
browser.get(url)
html = browser.page_source
get_products(html, data)
time.sleep(2)
finally:
browser.close()
def main():
url = "http://www.netbian.com/weimei/index_2.htm"
data = {}
index_page(url, data)
write_path(data)
main()
如果有那位大神可以帮下找下错误,感激不尽。我的目的是:从网址爬取图片并写入文件,可以达到效果,但是有bug.