python 全自动化爬取必应图片

python 全自动化爬取必应图片

from selenium import webdriver
import requests
import time
import re
import urllib.parse

headers = {
     
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}


driver = webdriver.Chrome()
def getcontent():
    kws = input('请想要下载的图片:')
    kw = urllib.parse.quote(kws)
    # driver.get(f'https://www.bing.com/images/search?q={kw}&FORM=HDRSC2')
    driver.get(f'https://www.bing.com/images/search?q={kw}&qs=n&form=QBILPG&sp=-1&pq={kw}&sc=0-4&sk=&cvid=C404FAE042E249B38BD77A6FBB8570E4')
    driver.execute_script('window.scrollBy(0, 300)')
    time.sleep(6)
    driver.execute_script('window.scrollBy(0, 300)')
    time.sleep(6)
    driver.execute_script('window.scrollBy(0, 300)')
    time.sleep(6)
    driver.execute_script('window.scrollBy(0, 300)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 300)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    driver.execute_script('window.scrollBy(0, 500)')
    time.sleep(4)
    photo_urlss = driver.find_elements_by_xpath('//div[@id="vm_c"]//a[@class="iusc"]')
    p = []
    try:
        for photo_urls in photo_urlss:
            pp = photo_urls.get_attribute('m')

            p.append(pp)

        # print(p)
        qqq = []
        for iii in p:
            q = re.findall(r'murl":"(.*?)",',iii)
            qqq.append(q)

        # print(qqq)



        num = 1
        for ii in qqq:
            qqqq = "".join(ii)
            # print(qqqq)

            response = requests.get(qqqq,headers=headers)
            print('正在下载第%s张图片'%num)
            with open('C:\\Users\\79231\\Desktop\\1\\1111\\{}{}'.format(num,'.jpg'),'wb')as f:
                f.write(response.content)
                print('第%s张图片下载完成'%num)
                num += 1
    except:
        pass









if __name__ == '__main__':
    getcontent()

你可能感兴趣的:(python,IT,科技,python)