使用requests库和re库爬取微博热搜前十榜单

import requests
import re
import chardet
headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39'
}

response = requests.get('https://tophub.today/n/KqndgxeLl9',headers=headers)


encoding = chardet.detect(response.content)['encoding']


html_content = response.content.decode(encoding)


top_ten_regex = r'(.*?)'
top_ten_heats  = r'(\d.*?)'

top_ten_matches = re.findall(top_ten_regex, html_content, re.DOTALL)
top_ten_heat = re.findall(top_ten_heats,html_content,re.DOTALL)

print("Top Ten List:")

for i in range(10):
    print("{}.{}:{}".format(i+1,top_ten_matches[i],top_ten_heat[i]))

你可能感兴趣的:(python,开发语言,爬虫)