import requests
import re
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
url = 'https://www.baidu.com/s?wd=%E7%95%99%E4%B8%8B%E9%82%AE%E7%AE%B1'
response = requests.get(url,headers = headers)
html = response.text
http_ = re.compile('.*?\"(http.*?\//.*?)\".*?')
res = http_.findall(html)
list_ = []
for i in res:
if 'cache.baiducontent' not in i:
res.remove(i)
list_.append(i)
del list_[0]
list_1 = []
for i in list_:
response = requests.get(i,headers = headers)
html_1 = response.text
regex = re.compile("[\w!#$%&'*+/=?^_`{|}~-]+(?:\.[\w!#$%&'*+/=?^_`{|}~-]+)*@(?:[\w](?:[\w-]*[\w])?\.)+[\w](?:[\w-]*[\w])?")
res_1 = regex.findall(html_1)
print(list_1)