Python爬虫:wallhaven图片爬取

Python爬虫:wallhaven图片爬取_第1张图片

import re
import requests

headers = {
    'Cookie':"cookie自己在F12网络中获取 不提供"
}

def toplist():
    a = 0
    one_page_re = '
    for i in range(0,29):
        url = "https://wallhaven.cc/hot?page="+str(i)
        response = requests.get(url,headers).text
        result_page = re.findall(one_page_re,response,re.S)
        for i in result_page:
            re_two_page = '</span>
            response_two <span class="token operator">=</span> requests<span class="token punctuation">.</span>get<span class="token punctuation">(</span>i<span class="token punctuation">,</span>headers<span class="token punctuation">)</span><span class="token punctuation">.</span>text
            response_two_result <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>re_two_page<span class="token punctuation">,</span>response_two<span class="token punctuation">,</span>re<span class="token punctuation">.</span>S<span class="token punctuation">)</span>
            <span class="token keyword">for</span> i <span class="token keyword">in</span> response_two_result<span class="token punctuation">:</span>
                a <span class="token operator">+=</span> <span class="token number">1</span>
                path <span class="token operator">=</span> <span class="token string">"C:/Users/Administrator/Pictures/"</span>
                name <span class="token operator">=</span> path<span class="token operator">+</span><span class="token builtin">str</span><span class="token punctuation">(</span>a<span class="token punctuation">)</span><span class="token operator">+</span><span class="token string">".jpg"</span>
                response <span class="token operator">=</span> requests<span class="token punctuation">.</span>get<span class="token punctuation">(</span>i<span class="token punctuation">,</span>headers<span class="token punctuation">)</span><span class="token punctuation">.</span>content
                <span class="token keyword">try</span><span class="token punctuation">:</span>
                    <span class="token keyword">with</span> <span class="token builtin">open</span><span class="token punctuation">(</span>name<span class="token punctuation">,</span><span class="token string">) as f:
                        f.write(response)
                    f.close()
                    print(">>save=="+str(a)+"==picture success!<<")
                except:
                    print(">>save==" + str(a) + "==picture failed!<<")
                    continue

def random():
    a = 0
    re_one_page = '
    re_all_page = 'title=".*?">(\d+.?\d*)'
    url = 'https://wallhaven.cc/random?seed=SuMDKp&page=2'
    response = requests.get(url).text
    all_page_num = re.findall(re_all_page,response,re.S)
    for i in range(0,int(all_page_num[8])):
        url = 'https://wallhaven.cc/random?seed=SuMDKp&page='+str(i)
        response_pic_text = requests.get(url,headers).text
        pic_url = re.findall(re_one_page,response_pic_text,re.S)
        print('Now page is :'+str(i))
        if len(pic_url) > 0:
            for i in pic_url:
                response_url_get_text = requests.get(i,headers).text
                re_url_get_url = '</span>
                result_pic_url <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>re_url_get_url<span class="token punctuation">,</span>response_url_get_text<span class="token punctuation">,</span>re<span class="token punctuation">.</span>S<span class="token punctuation">)</span>
                <span class="token keyword">for</span> i <span class="token keyword">in</span> result_pic_url<span class="token punctuation">:</span>
                    a <span class="token operator">+=</span> <span class="token number">1</span>
                    path <span class="token operator">=</span> <span class="token string">"C:/Users/Administrator/Pictures/"</span>
                    name <span class="token operator">=</span> path <span class="token operator">+</span> <span class="token builtin">str</span><span class="token punctuation">(</span>a<span class="token punctuation">)</span> <span class="token operator">+</span> <span class="token string">".jpg"</span>
                    response <span class="token operator">=</span> requests<span class="token punctuation">.</span>get<span class="token punctuation">(</span>i<span class="token punctuation">,</span> headers<span class="token punctuation">)</span><span class="token punctuation">.</span>content
                    <span class="token keyword">try</span><span class="token punctuation">:</span>
                        <span class="token keyword">with</span> <span class="token builtin">open</span><span class="token punctuation">(</span>name<span class="token punctuation">,</span> <span class="token string">) as f:
                            f.write(response)
                        f.close()
                        print(">>save==" + str(a) + "==picture success!<<")
                    except:
                        print(">>save==" + str(a) + "==picture failed!<<")
                        continue
        else:
            continue

if __name__ == '__main__':
    print("<----------------------------------------->")
    print("welcome To wallhave download picture script")
    print("Download hot picture Enter >> 1 <<")
    print("Download random picture Enter >> 2 <<")
    print("Download path default is : C:/Users/Administrator/Pictures/")
    print("<----------------------------------------->")
    a = input("Enter You choose:")
    if (int(a) == 1):
        toplist()
    elif(int(a) == 2):
        random()
    else:
        print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
        print("You Enter num error please try!")
        print("Thank you for using this software. Please restart the software and use it again!")
        print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")

没有做png和jpg判断!!!

你可能感兴趣的:(爬虫,python,爬虫,开发语言)