python网络爬虫实例

目录

1、访问百度

2、输入单词百度翻译

3、豆瓣电影排行榜

4、豆瓣电影top250

5、下载美女壁纸


1、访问百度

from urllib.request import urlopen
url="http://www.baidu.com"
resp=urlopen(url)

with open("mybaidu.html",mode="w") as f:
    f.write(resp.read().decode("utf-8"))
print("over!")

2、输入单词百度翻译

import requests
url="https://fanyi.baidu.com/sug"
s=input("请输入你要翻译的英文单词")
dat={"kw":s}
#发送POST请求
resp=requests.post(url,data=dat)
print(resp.json())
resp.close()

3、豆瓣电影排行榜


import requests
url="https://movie.douban.com/j/chart/top_list"
param={"type": "24",
"interval_id": "100:90",
"action":"",
"start":"0",
"limit": "20"}
header={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
#发送get请求
resp=requests.get(url,params=param,headers=header)
print(resp.json())
resp.close()

python网络爬虫实例_第1张图片

4、豆瓣电影top250

import requests
import re
url="https://movie.douban.com/top250"
header={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
resp=requests.get(url,headers=header)
page_content=resp.text
obj=re.compile(r'
  • .*?
    .*?(?P.*?).*?

    .*?
    (?P.*?) .*?(?P.*?).*?(?P.*?)人评价',re.S) result=obj.finditer(page_content) for it in result: print(it.group("name")) print(it.group("year").strip()) print(it.group("score")) print(it.group("num")) print("over!")

  • 5、下载美女壁纸

    import requests
    from bs4 import BeautifulSoup
    import time
    url="https://www.umei.cc/bizhitupian/meinvbizhi/"
    #header={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
    resp=requests.get(url)
    resp.encoding='utf-8'
    main_page=BeautifulSoup(resp.text,"html.parser")
    alist=main_page.find("div",class_="item_list infinite_scroll").find_all("a")
    for a in alist:
        href="http://umei.cc"+a.get("href")
        child_page_resp=requests.get(href)
        child_page_resp.encoding='utf-8'
        child_page=BeautifulSoup(child_page_resp.text,"html.parser")
        b=child_page.find("div",class_="big-pic")
        img=b.find("img")
        src=img.get("src")
        img_resp=requests.get(src)
        
        img_name=src.split("/")[-1]
        with open(img_name,mode="wb") as f:
            f.write(img_resp.content)
        print("over!",img_name)
        time.sleep(1)
    print("all over!")

    python网络爬虫实例_第2张图片

    你可能感兴趣的:(python,爬虫)