python 爬取百度百科

import re
from urllib import request
from urllib.parse import quote
from bs4 import BeautifulSoup as sp

header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0','Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
bracket = re.compile(r'\[\d*]')


def look_up(entry):
    url = "https://baike.baidu.com/item/" + quote(entry)
    req = request.Request(url, headers=header)
    html = request.urlopen(req).read()
    soup = sp(html, "html.parser")

    content=soup.findAll('div',{'class':'para'})
    for i in content:
        i=i.get_text()
        i=i.replace('\n','')
        i=i.replace('\r','')
        i=re.sub(bracket,'',i)
        print(i)
look_up("高等数学")

你可能感兴趣的:(Python,爬虫,python,爬虫,百度百科)