python BeautifulSoup 实现网页的爬取

python BeautifulSoup 实现网页的爬取

# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests

if __name__ == '__main__':
    session = requests.Session()
    page_url = "https://search.bilibili.com/all?keyword=CNN"
    headers = {
     
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66"
    }
    res = session.get(page_url,headers=headers)
    soup = BeautifulSoup(res.text)
    a = soup.find_all('a',class_='title')
    span = soup.find_all('span',class_='so-icon time')
    title = a[0].get('title')
    href = a[0].get('href')
    time = span[0].contents[1][9:19]
    print("title:  "+ title)
    print("href:  "+href)
    print("time:  " + time)
    print("done!")

python BeautifulSoup 实现网页的爬取_第1张图片

你可能感兴趣的:(python,BeautifulSoup)