python爬虫知识点总结(四)Requests库的基本使用

官方文档:http://docs.python-requests.org/en/master

安装方法

  命令行下输入:pip3 install requests。详见:https://www.cnblogs.com/cthon/p/9388304.html

一、什么是Requets?

python爬虫知识点总结(四)Requests库的基本使用_第1张图片

 

requets

实例引入

import requests

response = requests.get('https://www.baidu.com')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)
print(response.cookies)

  

各种请求方式

import requests
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.get('http://httpbin.org/get')
requests.options('http://httpbin.org/get')

  

请求

基本GET请求

基本写法

import requests

response = requests.get('http://httpbin.org/get')
print(response.text)

  

带参数GET请求

import requests
response = requests.get('http://httpbin.org/get?name=jack&age=22')
print(response.text)

  

import requests

data = {
    'name':'jack',
    'age':22
}
response = requests.get('http://httpbin.org/get',params=data)
print(response.text)

  

解析json

import requests
import json

response = requests.get('https://github.com/get')
print(type(response.text))
print(response.json())
print(json.loads(response.text))
print(type(response.json()))

  

获取二进制数据

import requests

response = requests.get('https://github.com/favicon.ico')
print(type(response.text),type(response.content))
print(response.text)
print(response.content)

  

import requests

response = requests.get('https://www.bilibili.com/video/av24028845/?p=9')
with open('q.avi','wb') as f:
    f.write(response.content)
    f.close()

  

添加headers

import requests

response = requests.get('https://zhihu.com/explore')
print(response.text)

  

import requests

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'}
response = requests.get('https://www.zhihu.com/explore',headers=headers)
print(response.text)

  

基本POST请求

import requests

data = {'name':'jack','age':'22'}
response = requests.post('https://httpbin.org/post',data=data)
print(response.text)
print(response.json())

  

import requests

data = {'name':'jack','age':'22'}
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'}
response = requests.post('https://httpbin.org/post',data=data,headers=headers)
print(response.text)
print(response.json())

  

响应

response属性

import requests

response = requests.get('http://www.jianshu.com')
print(type(response.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)
print(type(response.history),response.history)

  

状态码判断

import requests

response = requests.get('http://www.cnblogs.com/cthon/p/9383778.html')
exit() if not response.status_code == requests.codes.not_found else print('404 Not Found')

  

import requests

response = requests.get('http://www.cnblogs.com/cthon/p/9383778.html')
exit() if not response.status_code == 200 else print('Request Successfully')

  

状态码

100:('continue',),
101:('switching_protocols',),
102:('processing',),
103:('checkpoint',),
122:('url_too_long','request_url_too_long'),
200:('ok','okay','all_ok','all_okay','all_good','\\o/','√',),
201:('created',),
202:('accepted',),
203:('non_authoritative_info','non_authoritative_information'),
204:('no_content',),
205:('reset_content','reset',),
206:('partial_content','partial'),
207:('multi_status','multiple_status','multi_stati','multiple_status'),
208:('already_reported',),
226:('im_used',),
    
#Redirection
300:('multiple_choices',),
301:('moved_permanently','moved','\\o-'),
302:('found',),
303:('see_other','other'),
304:('not_modified',),
305:('use_proxy',),
306:('switch_proxy',),
307:('temporary_redirect','temporary_moved','temporary'),
308:('permanent_redirect','temporary_moved','temporary',),#There 2 to be removed in 3.0
    
#Client Error
400:('bad_request','bad'),
401:('unauthorized',),
402:('payment_required','payment'),
403:('forbidden',),
404:('not_found','-o-'),
405:('method_not_allowed','not_allowed'),
406:('not_acceptable',),
407:('proxy_authentication_required','proxy_auth','proxy_authentication'),
408:('request_timeout','timeout'),
409:('confict',),
410('gone',),
411:('length_required',),
412:('precondition_failed','precondition'),
413:('request_entity_too_large',),
414:('request_url_too_large',),
415:('unsupported_media_type','unsupported_media','media_type'),
416:('requested_range_not_satisfiable','requestd_range','range_not_satisfiable'),
417:('expectation_request',),
418:('im_a_teapot','teapot','i_am_a_teapot'),
421:('misdirected_request',),
422:('unprocessable_entity','unprocessable'),
423:('locked',),
424:('failed_dependency','dependency'),
425:('unordered_collection','unordered'),
426:('upgrade_required','upgrade'),
428:('precondition_required','precondition'),
429:('too_many_requests','too_many'),
431:('header_fields_too_large','fields_too_large'),
444:('no_response','none'),
449:('retry_with','retry'),
450:('blocked_by_windows_parental_controls','parental_controls'),
451:('unavailable_for_legal_reasons','legal_reasons'),
499:('client_closed_request',),

#Server Error
500:('internal_server_error','server_error','/o\\','×'),
501:('not_implemented',),
502:('bad_gateway',),
503:('service_unavailable','unavailable'),
504:('gateway_timeout',),
505:('http_version_not_supported','http_version'),
506:('variant_also_negotiaes',),
507:('insufficient_storage',),
509:('bandwidth_limit_exceeded','bandwidth'),
510:('not_extended',),
511:('network_aurhentication_required','network_auth','network_authentication'),

  

高级文件操作

import requests

files= {'file':open('favicon.ico','rb')}
response = requests.post('http://httpbin.org/post',files=files)
print(response.text)

  

获取Cookie

import requests

response = requests.get('http://www.baidu.com')
print(response.cookies)
for key,value in response.cookies.items():
    print(key+'='+value)

  

会话维持

import requests

requests.get('http://httpbin.org/cookies/set/number/123456789')
response=requests.get('http://httpbin.org/cookies')
print(response.text)

  

import requests

s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
response=s.get('http://httpbin.org/cookies')
print(response.text)

  

证书验证

#12306错误证书,请求失败
import requests

response = requests.get('https://www.12306.cn/')
print(response.status_code)

  

import requests
from requests.packages import urllib3
urllib3.disable_warnings()
response = requests.get('https://www.12306.cn',verify = False)
print(response.status_code)

  

import requests

reeponse = requests.get('https://www.12306.cn',cer=('/path/server.crt','/path/key'))
print(response.status_code)

  

代理设置

  http代理

import requests

proxies = {
    'http':'http://127.0.0.1:9743',
    'https':'https://127.0.0.1:9743'
}
response = requests.get('https://www.taobao.com',proxies=proxies)
print(response.status_code)

  

import requests

proxies = {
    'http':'http:/user:password@/127.0.0.1:9743'
}
response = requests.get('https://www.taobao.com',proxies=proxies)
print(response.status_code)

  

  socket代理

pip3 install 'requests[socks]'
 
import requests

proxies = {
    'http':'socks5://127.0..0.1.9742',
    'https':'socks5://127.0.0.1:9742'
}
response = requests.get('https://www.taobao.com',proxies=proxies)
print(response.status_code)

 

超时设置

import requests
from requests.exceptions import ReadTimeout
try:
    response = requests.get('http://www.baidu.com',timeout = 0.01)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')

  

认证设置

import requests
from requests.auth import HTTPBasicAuth 

r = requests.get('http://120.27.34.24:9001',auth=HTTPBasicAuth('user','123'))
print(r.status_code)

  

import requests

r = requests.get('http://120.27.34.24:9001',auth=('user','123'))
print(r.status_code)

  

异常处理

import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException

try:
    response = requests.get('http://www.baidu.com',timeout=0.1)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
except HTTPError:
    print('Http error')
except ConnectionError:
    print('Connection Error')   
except RequestException:
    print('Error')

  

 

 

 

 

 

转载于:https://www.cnblogs.com/cthon/p/9398026.html

你可能感兴趣的:(python爬虫知识点总结(四)Requests库的基本使用)