python爬虫url跳转,获取跳转地址
方法一
from urllib.parse import urlparse
import requests
url = 'https://tophub.today/l?e=461cpnztGY3l9fR3EWmV%2BDSJE2QUY%2F6uZJ8ga%2Fe8366O0f%2BsYELJPASL8xhNSeLKE33ZnOT3IZa%2FYfwZAG%2FhnIaneCtWMkN3SbQH64DOPE5Rz3Jy%2FaJstSnflc2MzSAnHfFhIJqCnW%2FtcxY%2FBsr0S1QKBr%2FdPMRc4m8'
headers = {
"Host": "tophub.today",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
}
r = requests.get(url,headers=headers, allow_redirects=False)
print(r.headers['Location'])
print(r.headers)
url_result = urlparse(r.headers['Location'])
print(url_result)
mother_url = url_result.scheme + "://" + url_result.netloc +url_result.path + '?'+ url_result.query
print(mother_url)
方法二
import requests
if __name__ == '__main__':
http_headers = {
'Accept': '*/*',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
item = 'https://tophub.today/l?e=461cpnztGY3l9fR3EWmV%2BDSJE2QUY%2F6uZJ8ga%2Fe8366O0f%2BsYELJPASL8xhNSeLKE33ZnOT3IZa%2FYfwZAG%2FhnIaneCtWMkN3SbQH64DOPE5Rz3Jy%2FaJstSnflc2MzSAnHfFhIJqCnW%2FtcxY%2FBsr0S1QKBr%2FdPMRc4m8'
try:
resp = requests.get(url=item, headers=http_headers, timeout=10)
except Exception as e:
pass
print("resp", resp.url)