python+selenium爬取煎蛋网图片
网址:http://jandan.net/ooxx
#coding:utf-8
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
import os.path
class getImg():
def __init__(self):
self.url = "http://jandan.net/ooxx"
self.driver = webdriver.Chrome()
def list_img(self):
imgs = self.driver.find_elements(By.XPATH, '//div[@id="comments"]/ol/li')
imgList = []
for img in imgs:
if img.get_attribute("class") != 'row':
png = img.find_element_by_xpath('.//div[@class="text"]/p/img').get_attribute("src")
imgList.append(png)
return imgList
def save_img(self, list):
file_name = './煎蛋网'
if os.path.isdir(file_name) is not True:
os.mkdir(file_name)
for img in list:
print(img)
data = requests.get(img).content
with open(file_name+'/'+img[-10:], 'wb') as f:
f.write(data)
def run(self):
self.driver.get(self.url)
for i in range(0,5):
print("============第"+str(i+1)+"页===========")
imgUrl = self.list_img()
self.save_img(imgUrl)
self.driver.find_element_by_link_text('下一页').click()
if __name__ == "__main__":
img = getImg()
img.run()