网络库urllib模块使用
from urllib import request
url ='http://www.baidu.com'
response = request.urlopen(url,timeout=1)
print (response.read().decode('utf-8'))
如上事例代码去掉decode('utf-8')时会出现乱码,主要原因是如果用read直接去读时1个汉字只占1个字节,所以出现乱码。
gbk编码一个汉字占2个字节,包括一些常用汉字。
utf-8编码一个汉字占3个字节,生僻汉字都包括。
请求网页方式:
何时使用 GET?
您能够使用 GET(默认方法):
如果表单提交是被动的(比如搜索引擎查询),并且没有敏感信息。
当您使用 GET 时,表单数据在页面地址栏中是可见的:
action_page.php?firstname=Mickey&lastname=Mouse
注释:GET 最适合少量数据的提交。浏览器会设定容量限制。
何时使用 POST?
您应该使用 POST:
如果表单正在更新数据,或者包含敏感信息(例如密码)。
POST 的安全性更加,因为在页面地址栏中被提交的数据是不可见的。
GET和POST例子:
from urllib import parse
from urllib import request
data = bytes(parse.urlencode({'word':'hello'}),encoding='utf8')#post方式要定义传递data
response = request.urlopen('http://httpbin.org/post', data=data)#post方式访问
print(response.read().decode('utf-8'))
#输出结果:
{
"args": {},
"data": "",
"files": {},
"form": {
"word": "hello"
},
"headers": {
"Accept-Encoding": "identity",
"Connection": "close",
"Content-Length": "10",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "Python-urllib/3.6"
},
"json": null,
"origin": "113.110.215.45",
"url": "http://httpbin.org/post"
}
response2 = request.urlopen('http://httpbin.org/get', timeout=1)#get方式访问
print(response2.read().decode('utf-8'))
#输出结果:
{
"args": {},
"headers": {
"Accept-Encoding": "identity",
"Connection": "close",
"Host": "httpbin.org",
"User-Agent": "Python-urllib/3.6"
},
"origin": "113.110.215.45",
"url": "http://httpbin.org/get"
}
异常捕获处理:
import urllib
import socket
try:
response3 = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
except urllib.error.URLError as e:
if isinstance(e.reason, socket.timeout):
print('TIME OUT')
http头部信息模拟:
from urllib import request, parse
url = 'http://httpbin.org/post'
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, sdch",
"Accept-Language": "zh-CN,zh;q=0.8",
"Connection": "close",
"Cookie": "_gauges_unique_hour=1; _gauges_unique_day=1; _gauges_unique_month=1; _gauges_unique_year=1; _gauges_unique=1",
"Referer": "http://httpbin.org/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER"
}
dict = {
'name': 'value'
}
data = bytes(parse.urlencode(dict), encoding='utf8')
req = request.Request(url=url, data=data, headers=headers, method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))