python selenium 自动化爬取动态加载图片

python selenium 自动化爬取动态加载图片

from selenium import webdriver
import requests
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}


class YunSpider():

    #初始化方法
    def __init__(self,url):
        #初始化网址
        self.url = url
        #打开浏览器
        self.driver = webdriver.Chrome()

    def getContent(self):
        # 打开网址
        self.driver.get(self.url)

        self.driver.execute_script('window.scrollBy(0, 300)')
        time.sleep(4)
        self.driver.execute_script('window.scrollBy(0, 300)')
        time.sleep(4)
        self.driver.execute_script('window.scrollBy(0, 300)')
        time.sleep(4)
        self.driver.execute_script('window.scrollBy(0, 300)')
        time.sleep(4)
        self.driver.execute_script('window.scrollBy(0, 300)')
        time.sleep(4)
        self.driver.execute_script('window.scrollBy(0, 300)')
        time.sleep(4)

        for i in range(5):
            photos = self.driver.find_elements_by_xpath('//div[@class="random_picture"]//img')
            photo_list = []

            for photo in photos:

                # print(photo)
    #             print('正在保存!')
                p = photo.get_attribute('src')
                time.sleep(1)
                photo_list.append(p)
                time.sleep(2)
                # print(p)
                num = 1
                for iii in photo_list:
                    print('正在下载第:%s张图片'%num)
                    response = requests.get(iii,headers=headers)
                    with open('C:\\Users\\79231\\Desktop\\1\\1111\\{}{}'.format(num, '.jpg'), 'wb')as f:
                        f.write(response.content)
                        print('第%s张图片保持成功!' % num)
                        num += 1




            nextPage = self.driver.find_element_by_xpath('//*[@id="pic-detail"]/div/div[2]/div[3]/ul/li[13]/a')
            nextPage.click()
            time.sleep(0.5)


if __name__ == '__main__':
    url = 'https://www.doutula.com/photo/list/'
    yunspider = YunSpider(url)
    yunspider.getContent()

你可能感兴趣的:(python,IT,科技,python)