import requests
from bs4 import BeautifulSoup
def get(url):
header = {
"User-Agent": " Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36"
}
response = requests.get(url, header)
return response.text
def soup(url):
return BeautifulSoup(get(url))
from reptile import request
if __name__ == '__main__':
url = 'https://nav.wretchant.com/plugin/js-keyboard-event/index.html'
soup = request.soup(url)
a = soup.select('a')
for b in a:
# 打印html
print(b)
# 获取class 值
print(b['class'])
# 获取href 值
print(b['href'])
# 获取标签内容
print(b.string)
输出
<a class="url" href="linux-cmd.html">Linux 命令</a>
['url']
linux-cmd.html
Linux 命令
<a class="url" href="csdn-template.html">csdn-template</a>
['url']
csdn-template.html
csdn-template