标签: python二三事
慕课网学习笔记
requests中文文档: http://cn.python-requests.org/zh_CN/latest/
urllib和urllib2的区别
curl命令
下载单个文件,默认将输出打印到标准输出中(STDOUT)中
curl http://www.centos.org
通过-o/-O选项保存下载的文件到指定的文件中:
-o:将文件保存为命令行中指定的文件名的文件中
-O:使用URL中默认的文件名保存文件到本地
# 将文件下载到本地并命名为mygettext.html
curl -o mygettext.html http://www.gnu.org/software/gettext/manual/gettext.html
# 将文件保存到本地并命名为gettext.html
curl -O http://www.gnu.org/software/gettext/manual/gettext.html
同样可以使用转向字符”>”对输出进行转向输出
同时获取多个文件
curl -O URL1 -O URL2
若同时从同一站点下载多个文件时,curl会尝试重用链接(connection)。
通过-L选项进行重定向
默认情况下CURL不会发送HTTP Location headers(重定向).当一个被请求页面移动到另一个站点时,会发送一个HTTP Loaction header作为请求,然后将请求重定向到新的地址上。
例如:访问google.com时,会自动将地址重定向到google.com.hk上。
curl http://www.google.com
<HTML>
<HEAD>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>302 MovedTITLE>
HEAD>
<BODY>
<H1>302 MovedH1>
The document has moved
<A HREF="http://www.google.com.hk/url?sa=p&hl=zh-CN&pref=hkredirect&pval=yes&q=http://www.google.com.hk/&ust=1379402837567135amp;usg=AFQjCNF3o7umf3jyJpNDPuF7KTibavE4aA">hereA>.
BODY>
HTML>
上述输出说明所请求的档案被转移到了http://www.google.com.hk。
这是可以通过使用-L选项进行强制重定向
# 让curl使用地址重定向,此时会查询google.com.hk站点
curl -L http://www.google.com
断点续传
通过使用-C选项可对大文件使用断点续传功能,如:
# 当文件在下载完成之前结束该进程
$ curl -O http://www.gnu.org/software/gettext/manual/gettext.html
############## 20.1%
# 通过添加-C选项继续对该文件进行下载,已经下载过的文件不会被重新下载
curl -C - -O http://www.gnu.org/software/gettext/manual/gettext.html
############### 21.1%
对CURL使用网络限速
通过–limit-rate选项对CURL的最大网络使用进行限制
# 下载速度最大不会超过1000B/second
curl --limit-rate 1000B -O http://www.gnu.org/software/gettext/manual/gettext.html
在访问需要授权的页面时,可通过-u选项提供用户名和密码进行授权
curl -u username:password URL
# 通常的做法是在命令行只输入用户名,之后会提示输入密码,这样可以保证在查看历史记录时不会将密码泄露
curl -u username URL
保存与使用网站cookie信息
# 将网站的cookies信息保存到sugarcookies文件中
curl -D sugarcookies http://localhost/sugarcrm/index.php
# 使用上次保存的cookie信息
curl -b sugarcookies http://localhost/sugarcrm/index.php
传递请求数据
默认curl使用GET方式请求数据,这种方式下直接通过URL传递数据
可以通过 –data/-d 方式指定使用POST方式传递数据
………………
# -*- coding: utf-8 -*-
import requests
URL_IP = 'http://localhost:8000/ip'
URL_GET = 'http://localhost:8000/get'
def use_simple_requests():
response = requests.get(URL_IP)
print '>>>>Response Headers:'
print response.headers
print '>>>>Response body:'
print response.text
def use_params_requests():
params = {'param1': 'hello', 'param2': 'world'}
response = requests.get(URL_GET, params=params)
print '>>>>Response Headers:'
print response.headers
print '>>>>Status Code:'
print response.status_code
print '>>>>Reason:'
print response.reason
print '>>>>Request body:'
print response.text
if __name__ == '__main__':
print '>>>Use simple requests:'
use_simple_requests()
print ''
print '>>>Use params requests:'
use_params_requests()
用法:requests.method
github developer: https://developer.github.com/guides/getting-started/
第3章 代码
# -*- coding: utf-8 -*-
import json
import requests
from requests import exceptions
URL = 'https://api.github.com'
def build_uri(endpoint):
return '/'.join([URL, endpoint])
def better_print(json_str):
return json.dumps(json.loads(json_str), indent=4)
def request_method():
response = requests.get(build_uri('user/emails'), auth=('imoocdemo', 'imoocdemo123'))
print better_print(response.text)
def params_request():
response = requests.get(build_uri('users'), params={'since': 11})
print better_print(response.text)
print response.request.headers
print response.url
def json_request():
# response = requests.patch(build_uri('user'), auth=('imoocdemo', 'imoocdemo123'), json={'name': 'babymooc2', 'email': '[email protected]'})
response = requests.post(build_uri('user/emails'), auth=('imoocdemo', 'imoocdemo123'), json=['[email protected]'])
print better_print(response.text)
print response.request.headers
print response.request.body
print response.status_code
def timeout_request():
try:
response = requests.get(build_uri('user/emails'), timeout=10)
response.raise_for_status()
except exceptions.Timeout as e:
print e.message
except exceptions.HTTPError as e:
print e.message
else:
print response.text
print response.status_code
def hard_requests():
from requests import Request, Session
s = Session()
headers = {'User-Agent': 'fake1.3.4'}
req = Request('GET', build_uri('user/emails'), auth=('imoocdemo', 'imoocdemo123'), headers=headers)
prepped = req.prepare()
print prepped.body
print prepped.headers
resp = s.send(prepped, timeout=5)
print resp.status_code
print resp.request.headers
print resp.text
if __name__ == '__main__':
hard_requests()
# -*- coding: utf-8 -*-
import requests
response = requests.get('https://api.github.com')
print "状态码,具体解释"
print response.status_code, response.reason
print "头部信息"
print response.headers
print "URL 信息"
print response.url
print "redirect 信息"
print response.history
print "耗费时长"
print response.elapsed
print "request 信息"
print response.request.method
print '----------------------'
print "编码信息"
print response.encoding
print "消息主体内容: byte"
print response.content, type(response.content)
print "消息主体内容: 解析"
print response.text, type(response.text)
print "消息主体内容"
print response.json(), type(response.json())
# -*- coding: utf -*-
import requests
def download_image():
"""demo: 下载图片, 文件
"""
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'}
url = "http://img3.imgtn.bdimg.com/it/u=2228635891,3833788938&fm=21&gp=0.jpg"
response = requests.get(url, headers=headers, stream=True)
with open('demo.jpg', 'wb') as fd:
for chunk in response.iter_content(128):
fd.write(chunk)
def download_image_improved():
"""demo: 下载图片
"""
# 伪造headers信息
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'}
# 限定url
url = "http://img3.imgtn.bdimg.com/it/u=2228635891,3833788938&fm=21&gp=0.jpg"
response = requests.get(url, headers=headers, stream=True)
from contextlib import closing
with closing(requests.get(url, headers=headers, stream=True)) as response:
# 打开文件
with open('demo1.jpg', 'wb') as fd:
# 每128写入一次
for chunk in response.iter_content(128):
fd.write(chunk)
download_image_improved()
# -*- coding: utf-8 -*-
import requests
def get_key_info(response, *args, **kwargs):
"""回调函数
"""
print response.headers['Content-Type']
def main():
"""主程序
"""
requests.get('https://api.github.com', hooks=dict(response=get_key_info))
main()