python.urllib库之parse

import urllib.parse

url = 'http://www.baidu.com/index.html;abc?m=1&n=xyz#opq'
# urlparse(url, scheme='', allow_fragments=True)
# 默认参数sheme:若网址没有写协议,则使用sheme中指定的协议
# 默认参数allow_fragments:是否忽略fragment即锚点,若忽略(False),则结果里fragment=''
res = urllib.parse.urlparse(url)
print(res)
# 结果:ParseResult(scheme='http', netloc='www.baidu.com', path='/index.html', params='abc', query='m=1&n=xyz', fragment='opq')
# res为元组类型,元素的数量和位置是固定的,可直接通过元素索引或者元素名访问res内部元素
print(res[0], res.scheme)
# 结果:http http

res = urllib.parse.urlsplit(url)
# urlsplit结果没有params元素,params元素被放在netloc元素一起
print(res)
# 结果:SplitResult(scheme='http', netloc='www.baidu.com', path='/index.html;abc', query='m=1&n=xyz', fragment='opq')

# 将各部分组合成标准url,参数格式为可迭代对象如列表、元组
# urlunparse的参数长度为6,urlunsplite参数长度为5
unparse = ('http', 'www.baidu.com', 'index.html', 'abc', 'm=1&n=xyz', 'opq')
new_url = urllib.parse.urlunparse(unparse)
print(new_url == url) # True

unsplit = ['http', 'www.baidu.com', 'index.html;abc', 'm=1&n=xyz', 'opq']
new_url = urllib.parse.urlunsplit(unsplit)
print(new_url == url) # True

# urljoin合并链接,用于绝对路径和相对路径的合并
print(urllib.parse.urljoin('http://www.abc.com/abc.html', 'xyz.html'))
print(urllib.parse.urljoin('http://www.abc.com/a/b/c/abc.html', 'xyz.html'))
print(urllib.parse.urljoin('http://www.abc.com/a/b/c/abc.html', '../xyz.html'))
print(urllib.parse.urljoin('http://www.abc.com/a/b/c/abc.html', '../../xyz.html'))
print(urllib.parse.urljoin('http://www.abc.com/a/b/c/abc.html', '../../../xyz.html'))
# http://www.abc.com/xyz.html
# http://www.abc.com/a/b/c/xyz.html
# http://www.abc.com/a/b/xyz.html
# http://www.abc.com/a/xyz.html
# http://www.abc.com/xyz.html

# 序列化和反序列化查询参数
data = {'a': 1, 'b': 2}
data = urllib.parse.urlencode(data)
print(data)
print(urllib.parse.parse_qs(data)) # 转换成字典
print(urllib.parse.parse_qsl(data)) # 转换成元组列表
# a=1&b=2
# {'a': ['1'], 'b': ['2']}
# [('a', '1'), ('b', '2')]

# url编码和解码
chs = '你好'
chr_chs = urllib.parse.quote(chs) # 编码
chs = urllib.parse.unquote(chr_chs) # 解码
print(chr_chs, chs, sep='\n')
#%E4%BD%A0%E5%A5%BD
#你好

你可能感兴趣的:(python)