python爬pixiv排行榜

爬pixiv需要科学上网

导包

import requests,os,time,winsound

设置蜂鸣,当下载完成之后系统蜂鸣

duration = 3000  # millisecond
freq = 440  # Hz

设置headers

headers = {
    "cookie":你的cookie,
    'Referer': 'https://www.pixiv.net/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}

提取你自己的cookie,先登录,右键网页,点击检查,点击network

python爬pixiv排行榜_第1张图片python爬pixiv排行榜_第2张图片

左侧随便点一个请求,找到headers,复制cookie

python爬pixiv排行榜_第3张图片

o = 1#计数器,显示下载到第几个作品
for m in range(1,3):#只下载前100的作品
    shs = requests.get("https://www.pixiv.net/ranking.php?p="+str(m)+"&format=json",headers=headers)#发起请求
    shs.encoding='utf-8'
    print(shs.text)
    zbj = shs.json()
    for k in zbj["contents"]:
        illust_id = k["illust_id"]#提取作品id
        swk = requests.get("https://www.pixiv.net/ajax/illust/"+str(illust_id)+"/pages?lang=zh",headers=headers)
        swk_j = swk.json()
        j = 1#计数器,显示下载到该作品的第几个图片
        for i in swk_j["body"]:
            original = i["urls"]["original"].replace("\\","")#去掉多余符号
            print(original)
            ts = requests.get(original, headers=headers)
            print(str(o)+"   "+str(j))
            print(ts)
            if not os.path.exists("pic"):#创建一个图片文件夹
                os.mkdir("pic")
            with open("pic"+"/"+str(illust_id)+"_"+str(j)+"."+original.split(".")[-1], "wb") as f:
                f.write(ts.content)#保存图片
            j+=1
            time.sleep(2)#休息2s防止被反爬
        o+=1
winsound.Beep(freq, duration)#系统蜂鸣

大功告成python爬pixiv排行榜_第4张图片

完整代码

import requests,os,time,winsound

duration = 3000  # millisecond
freq = 440  # Hz

headers = {
    "cookie":你的cookie,
    'Referer': 'https://www.pixiv.net/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}

o = 1
for m in range(1,3):
    shs = requests.get("https://www.pixiv.net/ranking.php?p="+str(m)+"&format=json",headers=headers)
    shs.encoding='utf-8'
    print(shs.text)
    zbj = shs.json()
    for k in zbj["contents"]:
        illust_id = k["illust_id"]
        swk = requests.get("https://www.pixiv.net/ajax/illust/"+str(illust_id)+"/pages?lang=zh",headers=headers)
        swk_j = swk.json()
        j = 1
        for i in swk_j["body"]:
            original = i["urls"]["original"].replace("\\","")
            print(original)
            ts = requests.get(original, headers=headers)
            print(str(o)+"   "+str(j))
            print(ts)
            if not os.path.exists("pic"):
                os.mkdir("pic")
            with open("pic"+"/"+str(illust_id)+"_"+str(j)+"."+original.split(".")[-1], "wb") as f:
                f.write(ts.content)
            j+=1
            time.sleep(2)
        o+=1
winsound.Beep(freq, duration)#系统蜂鸣

你可能感兴趣的:(python爬虫)