Python3 urllib和第三方模块requests

Python3 把所有的http包大包成为了两个包:http和urllib
1、http

from http import server, client, cookiejar, cookies

http会处理所有客户端-服务器http请求的具体细节,其中:
(1)client会处理客户端的部分
(2)server会协助你编写Python Web服务器程序
(3)cookies和cookiejar会处理cookie,cookie可以在请求中存储数据
使用cookie示例:

import http.cookiejar  
import urllib.request  
import urllib.parse</span></span>  
def getOpener(head):  
    # deal with the Cookies 
    cj = http.cookiejar.CookieJar()  
    pro = urllib.request.HTTPCookieProcessor(cj)  
    opener = urllib.request.build_opener(pro)  
    header = []  
    for key, value in head.items():  
        elem = (key, value)  
        header.append(elem)  
    opener.addheaders = header  
    return opener 

2、urllib
urllib是基于http的高层库:

from urllib import request, error, parse, response, robotparser

(1)request处理客户端的请求
(2)response处理服务器端的响应
(3)parse会解析url

简单的示例:

import urllib.request  
response = urllib.request.urlopen('http://python.org/') 
html = response.read()  
import urllib.request  
req = urllib.request.Request('http://python.org/') 
response = urllib.request.urlopen(req)  
the_page = response.read() 
import urllib.parse  
import urllib.request  
url = '" values = { 'act' : 'login', 'login[email]' : '', 'login[password]' : '' } data = urllib.parse.urlencode(values) req = urllib.request.Request(url, data) req.add_header('Referer', 'http://www.python.org/') 
response = urllib.request.urlopen(req)  
the_page = response.read()  
print(the_page.decode("utf8"))  
发送数据和header  
import urllib.parse  
import urllib.request  
url = ''  
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'  
values = {  
'act' : 'login',  
'login[email]' : '',  
'login[password]' : ''  
}  
headers = { 'User-Agent' : user_agent }  
data = urllib.parse.urlencode(values)  
req = urllib.request.Request(url, data, headers)  
response = urllib.request.urlopen(req)  
the_page = response.read()  
print(the_page.decode("utf8")) 
http 错误  
import urllib.request  
req = urllib.request.Request(' ')  
try:  
urllib.request.urlopen(req)  
except urllib.error.HTTPError as e:  
print(e.code)  
print(e.read().decode("utf8"))  
异常处理1  
from urllib.request import Request, urlopen  
from urllib.error import URLError, HTTPError  
req = Request("http://www..net /")  
try:  
response = urlopen(req)  
except HTTPError as e:  
print('The server couldn't fulfill the request.') 
print('Error code: ', e.code) 
except URLError as e:  
print('We failed to reach a server.') 
print('Reason: ', e.reason) 
else:  
print("good!")  
print(response.read().decode("utf8"))  
异常处理2  
from urllib.request import Request, urlopen  
from urllib.error import  URLError  
req = Request("http://www.Python.org/")  
try:  
response = urlopen(req)  
except URLError as e:  
if hasattr(e, 'reason'): 
print('We failed to reach a server.') 
print('Reason: ', e.reason) 
elif hasattr(e, 'code'): 
print('The server couldn't fulfill the request.') 
print('Error code: ', e.code) 
else:  
print("good!")  
print(response.read().decode("utf8"))  
HTTP 认证  
import urllib.request  
# create a password manager password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # Add the username and password. # If we knew the realm, we could use it instead of None. top_level_url = "" password_mgr.add_password(None, top_level_url, 'rekfan', 'xxxxxx') handler = urllib.request.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) opener = urllib.request.build_opener(handler) # use the opener to fetch a URL a_url = "" x = opener.open(a_url) print(x.read()) # Install the opener. # Now all calls to urllib.request.urlopen use our opener. urllib.request.install_opener(opener) a = urllib.request.urlopen(a_url).read().decode('utf8') print(a) 
使用代理  
import urllib.request  
proxy_support = urllib.request.ProxyHandler({'sock5': 'localhost:1080'})  
opener = urllib.request.build_opener(proxy_support)  
urllib.request.install_opener(opener)  
a = urllib.request.urlopen("").read().decode("utf8")  
print(a)  
超时  
import socket  
import urllib.request  
# timeout in seconds  
timeout = 2  
socket.setdefaulttimeout(timeout)  
# this call to urllib.request.urlopen now uses the default timeout  
# we have set in the socket module  
req = urllib.request.Request('') 
a = urllib.request.urlopen(req).read()  
print(a)  

你可能感兴趣的:(python)