每日一爬,爬黑丝爬白丝(想爬啥爬啥)—嘿嘿嘿,想爬多少爬多少

想爬啥爬啥,也不能这么男儿 本色,爬其他也可以

先展示一下成果:

import sys
import requests
import re
import os

count=0
wenjian = input("你的照片将要储存到......文件夹:")
img_path = f"./{wenjian}/"  # 指定保存地址
if not os.path.exists(img_path):
    print("您没有这个文件为您新建一个文件:")
    os.mkdir(img_path)
else:
    xuanze = str(input("您有这个文件将要覆盖您的文件内容是否继续:"))
    if xuanze == "yes":
        pass
    else:
        sys.exit()
print("让我们继续------")
print("------------------------------------------------------")
page_num = input("你要几页(一页为30张图片):")
for i in range(int(page_num)):
    url = "https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%D0%A1%BD%E3%BD%E3%D5%D5%C6%AC&fr=ala&ala=1&alatpl=normal&pos=0&dyTabStr=MCwyLDMsMSw0LDYsNSw3LDgsOQ%3D%3D"
    headers = {
        "Accept": "text/plain, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
        "Connection": "keep-alive",
        "Cookie": "BDqhfp=%E5%A4%A7%E7%86%8A%E7%8C%AB%E5%9B%BE%E7%89%87%26%26NaN-1undefined%26%261632%26%263; BIDUPSID=D076CA87E4CD25BA082EA0E9B5B9C82F; PSTM=1663428044; MAWEBCUID=web_fMcFGAgtkEbzDpinjKvUtGFDInsruypyhIDrXDSpxBBJoXftlZ; BAIDUID=D076CA87E4CD25BA568D2D9EF1AD5F5C:SL=0:NR=10:FG=1; indexPageSugList=%5B%22%E7%8C%AB%22%2C%22%26cl%3D2%26lm%3D-1%26ie%3Dutf-8%26oe%3Dutf-8%26adpicid%3D%26st%3D%26z%3D%26ic%3D%26hd%3D%26latest%3D%26copyright%3D%26word%3D%E5%A4%A7%E8%B1%A1%26s%3D%26se%3D%26tab%3D%26width%3D%26height%3D%26face%3D%26istype%3D%26qc%3D%26nc%3D%26fr%3D%26expermode%3D%26force%3D%26pn%3D30%26rn%3D30%22%2C%22%E6%80%A7%E6%84%9F%E7%BE%8E%E5%A5%B3%22%5D; ZFY=JujkjWiLPjOsSz:Ag1v0hFWlSBt4qjPC4L6bB4MDS6Jo:C; BAIDUID_BFESS=D076CA87E4CD25BA568D2D9EF1AD5F5C:SL=0:NR=10:FG=1; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; ab_sr=1.0.1_YTc4N2NiNWIyZWM5NTkzYzQ3MmZlNTI3Y2YyM2RiMTE3YmYwMTBiNzQ0YzhlZmJkZDY4YjJhZWU4NjVmMmQxZmJkYTcxODZkYTgwNjhhZDY5ZWZmYjg4Y2FmMGE5YTBmNjc3M2JhZDEwZTU1MTAyMTA1MjUxN2Y2NDNlMTJiNzhjNTIyYTQwNTg5ODNiMzc1MjRlZDdmNTVkMzdkOGJiOQ==",
        "Host": "image.baidu.com",
        "Referer": "https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%B4%F3%D0%DC%C3%A8%CD%BC%C6%AC&fr=ala&ala=1&alatpl=normal&pos=0&dyTabStr=MTEsMCwxLDMsNiw1LDQsMiw3LDgsOQ%3D%3D",
        "Sec-Ch-Ua": '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": '"Windows"',
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.43",
        "X-Requested-With": "XMLHttpRequest",
    }
    params = {
        "tn": "resultjson_com",
        "logid": "11637882045647848541",
        "ipn": "rj",
        "ct": "201326592",
        "fp": "result",
        "fr": "ala",
        "word": wenjian,
        "queryWord": wenjian,
        "cl": "2",
        "lm": "-1",
        "ie": "utf-8",
        "oe": "utf-8",
        "pn": str(int(i + 1) * 30),
        "rn": "30",
        "gsm": "3c",
    }

    html=requests.get(url,headers=headers,params=params)
    html.encoding ="utf-8"

    html=html.text
    result = re.findall('"thumbURL":"(.*?)"',html,re.S)
    for wen in result:
        count += 1
        myimg = requests.get(wen)
        file_name = f'{img_path}图片{str(count)}.jpg'
        # 图片和音乐WB的二进制写入方式
        f = open(file_name, "wb")
        f.write(myimg.content)
        print("正在保存" + str(count) + " 张图片")

每日一爬,爬黑丝爬白丝(想爬啥爬啥)—嘿嘿嘿,想爬多少爬多少_第1张图片

细节说一下:

辛辛苦苦弄了一天,唉还是我太笨了:

1.这个简单了就是个头头

count=0
wenjian = input("你的照片将要储存到......文件夹:")
img_path = f"./{wenjian}/"  # 指定保存地址
if not os.path.exists(img_path):
    print("您没有这个文件为您新建一个文件:")
    os.mkdir(img_path)
else:
    xuanze = str(input("您有这个文件将要覆盖您的文件内容是否继续:"))
    if xuanze == "yes":
        pass
    else:
        sys.exit()
print("让我们继续------")
print("------------------------------------------------------")
page_num = input("你要几页(一页为30张图片):")

2.

headers/params就是模仿浏览器访问

前面有记录,可以回头去看。有记录。

for i in range(int(page_num)):
    url = "https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%D0%A1%BD%E3%BD%E3%D5%D5%C6%AC&fr=ala&ala=1&alatpl=normal&pos=0&dyTabStr=MCwyLDMsMSw0LDYsNSw3LDgsOQ%3D%3D"
    headers = {
        "Accept": "text/plain, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
        "Connection": "keep-alive",
        "Cookie": "BDqhfp=%E5%A4%A7%E7%86%8A%E7%8C%AB%E5%9B%BE%E7%89%87%26%26NaN-1undefined%26%261632%26%263; BIDUPSID=D076CA87E4CD25BA082EA0E9B5B9C82F; PSTM=1663428044; MAWEBCUID=web_fMcFGAgtkEbzDpinjKvUtGFDInsruypyhIDrXDSpxBBJoXftlZ; BAIDUID=D076CA87E4CD25BA568D2D9EF1AD5F5C:SL=0:NR=10:FG=1; indexPageSugList=%5B%22%E7%8C%AB%22%2C%22%26cl%3D2%26lm%3D-1%26ie%3Dutf-8%26oe%3Dutf-8%26adpicid%3D%26st%3D%26z%3D%26ic%3D%26hd%3D%26latest%3D%26copyright%3D%26word%3D%E5%A4%A7%E8%B1%A1%26s%3D%26se%3D%26tab%3D%26width%3D%26height%3D%26face%3D%26istype%3D%26qc%3D%26nc%3D%26fr%3D%26expermode%3D%26force%3D%26pn%3D30%26rn%3D30%22%2C%22%E6%80%A7%E6%84%9F%E7%BE%8E%E5%A5%B3%22%5D; ZFY=JujkjWiLPjOsSz:Ag1v0hFWlSBt4qjPC4L6bB4MDS6Jo:C; BAIDUID_BFESS=D076CA87E4CD25BA568D2D9EF1AD5F5C:SL=0:NR=10:FG=1; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; ab_sr=1.0.1_YTc4N2NiNWIyZWM5NTkzYzQ3MmZlNTI3Y2YyM2RiMTE3YmYwMTBiNzQ0YzhlZmJkZDY4YjJhZWU4NjVmMmQxZmJkYTcxODZkYTgwNjhhZDY5ZWZmYjg4Y2FmMGE5YTBmNjc3M2JhZDEwZTU1MTAyMTA1MjUxN2Y2NDNlMTJiNzhjNTIyYTQwNTg5ODNiMzc1MjRlZDdmNTVkMzdkOGJiOQ==",
        "Host": "image.baidu.com",
        "Referer": "https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%B4%F3%D0%DC%C3%A8%CD%BC%C6%AC&fr=ala&ala=1&alatpl=normal&pos=0&dyTabStr=MTEsMCwxLDMsNiw1LDQsMiw3LDgsOQ%3D%3D",
        "Sec-Ch-Ua": '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": '"Windows"',
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.43",
        "X-Requested-With": "XMLHttpRequest",
    }
    params = {
        "tn": "resultjson_com",
        "logid": "11637882045647848541",
        "ipn": "rj",
        "ct": "201326592",
        "fp": "result",
        "fr": "ala",
        "word": wenjian,
        "queryWord": wenjian,
        "cl": "2",
        "lm": "-1",
        "ie": "utf-8",
        "oe": "utf-8",
        "pn": str(int(i + 1) * 30),
        "rn": "30",
        "gsm": "3c",
    }

3.

给浏览器发送请求:

然后将他解码

然后文本格式里找网址,用正则表达式。

html=requests.get(url,headers=headers,params=params)
    html.encoding ="utf-8"

    html=html.text
    result = re.findall('"thumbURL":"(.*?)"',html,re.S)

4.

 这个是储存文件,用正则表达式得到。然后写入,然后打印保存提示语言

    for wen in result:
        count += 1
        myimg = requests.get(wen)
        file_name = f'{img_path}图片{str(count)}.jpg'
        # 图片和音乐WB的二进制写入方式
        f = open(file_name, "wb")
        f.write(myimg.content)
        print("正在保存" + str(count) + " 张图片")

你可能感兴趣的:(c#,开发语言,python)