python爬虫url跳转,获取跳转地址

python爬虫url跳转,获取跳转地址

方法一
# -*- coding: UTF-8 -*-
from urllib.parse import urlparse
import requests
# 原始链接
url = 'https://tophub.today/l?e=461cpnztGY3l9fR3EWmV%2BDSJE2QUY%2F6uZJ8ga%2Fe8366O0f%2BsYELJPASL8xhNSeLKE33ZnOT3IZa%2FYfwZAG%2FhnIaneCtWMkN3SbQH64DOPE5Rz3Jy%2FaJstSnflc2MzSAnHfFhIJqCnW%2FtcxY%2FBsr0S1QKBr%2FdPMRc4m8'
headers = {
    "Host": "tophub.today",
    "Connection": "keep-alive",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
}
r = requests.get(url,headers=headers, allow_redirects=False)
# 获取跳转地址
print(r.headers['Location'])
print(r.headers)
# 解析跳转地址
url_result = urlparse(r.headers['Location'])
print(url_result)
# ParseResult(scheme='http', netloc='haokan.baidu.com', path='/v', params='', query='vid=3213932617390061588', fragment='')
# 下边针对不同的网站可以做出修改
mother_url = url_result.scheme + "://" + url_result.netloc +url_result.path + '?'+ url_result.query
# 获取跳转目标地址
print(mother_url)

方法二
import requests

if __name__ == '__main__':
    http_headers = {
        'Accept': '*/*',
        'Connection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
    }
    item = 'https://tophub.today/l?e=461cpnztGY3l9fR3EWmV%2BDSJE2QUY%2F6uZJ8ga%2Fe8366O0f%2BsYELJPASL8xhNSeLKE33ZnOT3IZa%2FYfwZAG%2FhnIaneCtWMkN3SbQH64DOPE5Rz3Jy%2FaJstSnflc2MzSAnHfFhIJqCnW%2FtcxY%2FBsr0S1QKBr%2FdPMRc4m8'

    try:
        resp = requests.get(url=item, headers=http_headers, timeout=10)
    except Exception as e:
        pass
    print("resp", resp.url)

你可能感兴趣的:(python,python,爬虫,开发语言)