urllib库

1 urlopen()

给Python官网爬下来

# urlopen()
import urllib.request

response = urllib.request.urlopen('https://www.python.org')
print(response.read().decode('utf-8'))

2 查看返回的类型

#查看返回的类型
import urllib.request

response = urllib.request.urlopen('https://www.python.org')
print(type(response))

#

这是一个HTTPResponse类型的对象,包含的方法有:
read()
readinto()
getheader(name)
getheaders()
fileno() 等。。。
属性:msg
version
status
reason
debuglevel
closed
3 再来看一个例子

#再来看一个例子
import urllib.request

response = urllib.request.urlopen('https://www.python.org')
print(response.status)
print(response.getheaders())
print(response.getheader('Server'))

#200
#这里的太长了,省略掉
#nginx

4 urlopen() 函数的API

#urlopen() 函数的API
urllib.request.urlopen(url, data=None, [timeout, ]*, 
    cafile=None, capath=None, cadefault=False, context=None)

5 data参数

import urllib.parse
import urllib.request

data = bytes(urllib.parse.urlencode({'word':'hello'}), encoding='utf8')
response = urllib.request.urlopen('http://httpbin.org/post', data=data)
print(response.read())

#运行结果不展示了

6 timeout参数

#这里的timeout参数的意思是,程序1秒之后,
#服务器依然没有响应,就会抛出URLError异常
import urllib.request

response = urllib.request.urlopen('http://httpbin.org/get', timeout=1)
print(response.read())

#结果不展示了
import socket
import urllib.request
import urllib.error

try:
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
    print(response.read())
except urllib.error.URLError as e:
    if isinstance(e.reason, socket.timeout):
        print('TIME OUT')

7 Request

class urllib.request.Request(url, data=None, headers={}, 
    origin_req_host=None, unverifiable=False, method=None)

看一个例子

传入多个参数构建请求

# 传入多个参数构建请求
from urllib import request, parse

url = 'http://httpbin.org/post'
headers = {
     'User-Agent':'Mozilla/4.0(compatible;MSIC 5.5;Windows NT)',
     'Host':'httpbin.org'
}
dict = {
    'name':'Germey'
}
data = bytes(parse.urlencode(dict), encoding='utf8')
req = request.Request(url=url, data=data, headers=headers, method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))

你可能感兴趣的:(urllib库)