写个爬虫吧

import requests

url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=0%2C0&fp=detail&logid=10936516020286848972&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=0&lpn=0&st=-1&word=%E9%AB%98%E6%B8%85&z=0&ic=&hd=&latest=©right=&s=undefined&se=&tab=0&width=&height=&face=undefined&istype=2&qc=&nc=&fr=&simics=&srctype=&bdtype=0&rpstart=0&rpnum=0&cs=4191142136%2C743401676&catename=&nojc=undefined&album_id=&album_tab=&cardserver=&tabname=&pn=0&rn=30&gsm=1e&1681623462653='

headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36X-Requested-With: XMLHttpRequest'
#'Cookie':'BDqhfp=美女&&-10-1undefined&&0&&1; BDIMGISLOGIN=0; winWH=^6_883x1042; BIDUPSID=2249E61E412A062BF74182296B03096D; PSTM=1654617428; BAIDUID=2249E61E412A062BDAB3549134A52CE8:FG=1; BAIDUID_BFESS=2249E61E412A062BDAB3549134A52CE8:FG=1; ZFY=bukO:BeE5wKcOXR1om2vydU8qz0Kg54J0OYfKQxQ6NeI:C; H_PS_PSSID=38515_36554_38470_38468_38375_38485_37920_37709_26350; delPer=0; PSINO=7; BA_HECTOR=818521ala02l0k2ka404a58q1i3n1tj1m; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=www.baidu.com; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; cleanHistoryStatus=0; indexPageSugList=["高清","美女","飞船图标","飞船矢量图","飞船试炼图","pygame 外星人入侵 bmp","pygame 外星人入侵","外星人入侵","bmp"]; BDRCVFR[tox4WRQ4-Km]=mk3SLVN4HKm; ab_sr=1.0.1_ZDM4NWY0NDhjNjEyZTNiNzQyZWM4ZTNiMGZkZjAwMWY2YjdiMWZiZTYzMzBkMDk2NjVhZjhkMTJiN2UxZmQzOWQ1YzEwYTkxZDczYWFhZDRlOWViMGNjZjkyMmRjZjQ2ZmNhNzU3OGM4MDBlNDkzMTQyMWZlZGU5ZDRkYTY1YTQwNzU3M2QwM2M2ODBjOWYwZTZjNzQ2MjhlZWYyNzM3Mg=='
}

response = requests.get(url,headers=headers)

res = response.json()
data = res['data']
replaceUrl = data[0]['replaceUrl'][0]['ObjUrl']
urls = []
num = 0
try:
    for url in data:
        num = num+1
        if num < len(data):
            # print(data[num]['replaceUrl'][0]['ObjUrl'])
            # print(len(data[num]['replaceUrl'][0]['ObjUrl']))
            if len(data[num]['replaceUrl'][0]['ObjUrl']) < 100:
                urls.append(data[num]['replaceUrl'][0]['ObjUrl'])
except KeyError as e:
    print("error ")
print(urls)

header1 = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36X-Requested-With: XMLHttpRequest',
'Referer':'https://image.baidu.com/search/detail?ct=503316480&z=0&ipn=d&word=%E9%AB%98%E6%B8%85&step_word=&hs=0&pn=2&spn=0&di=7207123747399008257&pi=0&rn=1&tn=baiduimagedetail&is=0%2C0&istype=2&ie=utf-8&oe=utf-8&in=&cl=2&lm=-1&st=-1&cs=4191142136%2C743401676&os=4072938957%2C2754952205&simid=4144708455%2C575913095&adpicid=0&lpn=0&ln=1659&fr=&fmq=1681623396531_R&fm=result&ic=&s=undefined&hd=&latest=©right=&se=&sme=&tab=0&width=&height=&face=undefined&ist=&jit=&cg=&bdtype=0&oriquery=&objurl=http%3A%2F%2Fpic1.win4000.com%2Fwallpaper%2F2018-10-15%2F5bc4612b7f826.jpg&fromurl=ippr_z2C%24qAzdH3FAzdH3Fooo_z%26e3Botg9aaa_z%26e3Bv54AzdH3Fowssrwrj6_kt2_8c8dl9_z%26e3Bip4s&gsm=1e&rpstart=0&rpnum=0&islist=&querylist=&nojc=undefined&dyTabStr=MCwzLDEsNiw0LDUsMiw3LDgsOQ%3D%3D'
}
#循环下载图片
for url in urls:
    try:
        picture = requests.get(url, headers=header1)
        print(picture)
        content = picture.content
        file_name = url.split('/')[-1]
        print(file_name)
        with open(file_name,"wb") as f:
            f.write(content)
    except Exception as e:
        print("eoor")
# picture = requests.get(replaceUrl,headers=header1)
# content = picture.content
# file_name = replaceUrl.split('/')[-1]
# print(file_name)
# with open(file_name,"wb") as f:
#     f.write(content)

过程中遇到402错误,需要在headers中加入refeer参数

你可能感兴趣的:(python,javascript,java)