爬虫-requests和bs4

requests的使用方法

# requests:python基于http协议进行网络请求的第三方库
import requests

"""1. 发送请求
requests.get(url, *, headers, params, proxies)  -   发送get请求
requests.post(url, *, headers, params, proxies) -   发送post请求

参数:
url     -       请求地址(一个网站的网址、接口的地址、图片地址等)
headers     -   设置请求头(设置cookie和User-Agent的时候使用)
params  -       设置参数
proxies -       设置代理
""" 
# 发送get请求,参数直接拼接到url中
"""
requests.get('http://api.tianapi.com/auto/index?key=c9d408fefd8ed4081a9079d0d6165d43&num=10')
"""


# 发送post请求,参数设置在params
"""
params = {
    'key': 'c9d408fefd8ed4081a9079d0d6165d43',
    'num': 10
}
requests.post('http://api.tianapi.com/auto/index', params=params)
"""

response = requests.get('http://www.yingjiesheng.com/')

"""2. 获取响应信息"""
# 设置编码方式(乱码的时候才需要设置)
response.encoding = 'GBK'

# 获取响应头信息
print(response.headers)

# 获取响应体
# a. 获取text值(用于请求网页,直接拿到网页源代码)
# print(response.text)

# b. 获取json解析结果(用于返回json数据的数据接口)
# print(response.json())

# c. 获取content值(获取二进制类型的原数据,用于图片、视频、音频的下载)
# print(response.content)

添加请求头

import requests

# ----------------------1. 添加User-Agent-------------------
# headers = {
   
#     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
# }
#
# response = requests.get('https://www.51job.com/', headers=headers)
#
# response.encoding = 'gbk'
#
# print(response.text)

# ----------------------2. 添加cookie--------------------------
headers = {
   
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
    'cookie': '_zap=4d58cb38-ec48-47b2-9e47-8ff8ef963486; _xsrf=veOhJnW2hAC2BDcgK8KTU4NqUrLUYuTe; d_c0="AHAQrl0PjROPTn2Bv2wpyQXt8QUwjW6yjTU=|1628663892"; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1628663893; __snaker__id=EYMWPXdZknPXfAye; gdxidpyhxdE=QQQ9DNLdBqx13etuowzGeLbMfcPXBfHckpwZ%2BxZp06A8zi9JHMPDxcbRi4o%5Ca053y5oVnjBBBb99XeqPZicZtcN2%5CR7snyRY8LQP%2Ff1Lu%5CEaPuZo9DldazSjxxzCmy0GXU7zlEHvH5jbqRxsq3d4HX5PN3j%5Cw7yrH2Ls29BYDaDCm0%2Fb%3A1628664795621; _9755xjdesxxd_=32; YD00517437729195%3AWM_NI=xDnvQnHhpYF6yUCebu826Rf%2FtJfpY7qOemzjWKJqvTeiC%2FN7ac2Cye8KddfyGIjjNxMaj1gnnUNWT6pGUEzV16y8CNLWmizD0SakKVmh9ELwcWrCleatFrWHNaWfd%2F1ZdWM%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eed4b479ad9e898bbc2591868bb3d85e968a9aaab566acb08196e862bbb4b7ade52af0fea7c3b92abc9ca297d4668cb8c0bac53ebb8b8d86e17ffceefab3e525b896aa91dc3db391ab8ef96295958692f560a7b78dd0cd3da3bdfea4fc6ffc95ac85e5738597a68bcd748fbfa6d2e666ae8b82b8d73eb4999ba6f95ef3eab7d9c2469089a38af950f48daf8cca5eafb8f7a6cf7da189bea4ef6fa3ac8a93d6448ebf9987e725f386acb8d037e2a3; YD00517437729195%3AWM_TID=Lftr4M6kyApFUEUBFFcv0DqgQ5uBSC%2FF; captcha_session_v2="2|1:0|10:1628663907|18:captcha_session_v2|88:OCtMcVVod1VSRDZ4Q2tTbGNyNUVIUXdJREc5Y0lSbjJyMklwSWh5MTA0NVhpL3JLak1CZXBPMEQ1ZlcycGludQ==|7f6c9d93866de2c49808fd0c3fa7ec6f7ef407e0fa6678072b00b577b351fb5f"; captcha_ticket_v2="2|1:0|10:1628663918|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6IkNOMzFfWVZ5NHQwWm1wZlJWN1pSQzd6czQ0dUF6cG51Q2xFbHk2d0h0RjdYSWt4RDQ3ODJuOXMta2ROclliYkt6SFNGWUNvc2NCLklvdll0ejVZSmM5T0lOR1lwa2gxTTQwRWlVOWtmdmZqN3U3Q2g2Y2ZQU1c2VjJ3UDJvV0ZWa2hpLTJWUlF6ZDdmTWItMnRDV1dfOHM2ZkNpcFRsYlhOdUZaOXpVVDlCMXhGRy0xTkdoUnJrWlpkUERmelNiVzZMMk83WVVkSkVUSjJzZ1F1WEtnODBIaGV0NlNjcVpUdUt4ZUhSUFNyS1lOUGRfeTl5dEI5TUduS2xFUVpRYzB6REs3d0dzTWpKbW1FUzBiSlBDdUo1WURxd1F0cVdFLTFOX01TQUJOSjdraEYxbDZzSUxRcVVaZmE1NDR5OXRKVXBwa014TkQ2N3lDR0xxNG4yWENUaGhlLUlsMEEyTHFuV3RPa1ppSy1STENCWVVRdkZKaDVYMWR4YVhaeWl5QnpRZ2FrUE5UelNRVmg3RzJVeUJmU1VGVGRyMHpFODktWTcuRENMNzA5cVEuRnZTN0NfWk9XN0swOW9vaUs1anJMcC1SbHotWGRPdE9wTnZpbGJXY3U5dU0uSjFhNTFr

你可能感兴趣的:(笔记,python,爬虫,http)