# pip install beautifulsoup4 from bs4 import BeautifulSoup html_doc = """The Dormouse's story p标签的内容 The Dormouse's story
Once upon a time there were three little sisters; and their names were Elsie, Lacie and Tillie; and they lived at the bottom of a well.
""" # 1.转类型 bs4.BeautifulSoup' soup = BeautifulSoup(html_doc, 'lxml') # 2.通用解析方法 # find--返回符合查询条件的 第一个标签对象 result = soup.find(name="p") result = soup.find(attrs={"class": "title"}) result = soup.find(text="Tillie") result = soup.find( name='p', attrs={"class": "story"}, ) # find_all--list(标签对象) result = soup.find_all('a') result = soup.find_all("a", limit=1)[0] result = soup.find_all(attrs={"class": "sister"}) # select_one---css选择器 result = soup.select_one('.sister') # select----css选择器---list result = soup.select('.sister') result = soup.select('#one') result = soup.select('head title') result = soup.select('title,.title') result = soup.select('a[id="link3"]') # 标签包裹的内容---list result = soup.select('.title')[0].get_text() # 标签的属性 # result = soup.select('#link1')[0].get('href') print(result)