爬虫入门二——post

爬取百度翻译处理数据的过程
【表单数据的处理】
form_data = urllib.parse.urlencode(form_data).encode()

import  urllib.request
import urllib.parse
post_url = 'https://fanyi.baidu.com/sug'
word = input('请输入你要查询的英文单词:')
#构建post表单数据
form_data = {
    'kw':word,
}
#发送请求的过程
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'
}
#构建请求对象
request = urllib.request.Request(url=post_url,headers=headers)
#处理post表单数据
form_data = urllib.parse.urlencode(form_data).encode()
#发送请求
response = urllib.request.urlopen(request,data=form_data)
print(response.read().decode())

爬虫入门二——post_第1张图片
爬虫入门二——post_第2张图片
爬取百度翻译

import urllib.request
import urllib.parse

post_url = 'https://fanyi.baidu.com/v2transapi'
word = 'baby'	
formdata = {
    'from'	: 'en',
    'query'	: word,
    'sign'	: '814534.560887',
    'simple_means_flag' : '3',
    'to'	: 'zh',
    'token'	: '02816325ffee1111bf235a46a566198c',
    'transtype' :	'realtime',
}

headers = {
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Accept': '*/*',
    'X-Requested-With': 'XMLHttpRequest',
    'Referer': 'https://fanyi.baidu.com/?aldtype=16047',
    'Accept-Language': 'zh-Hans-CN,zh-Hans;q=0.5',
    #'Accept-Encoding': 'gzip, deflate',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
    'Host': 'fanyi.baidu.com',
    #'Content-Length': '121',
    'Connection': 'Keep-Alive',
    'Cache-Control': 'no-cache',
    'Cookie': 'BAIDUID=0D0DAA575FF991F1F48D12B3753F7622:FG=1; BIDUPSID=0D0DAA575FF991F1F48D12B3753F7622; PSTM=1556349090; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; H_PS_PSSID=1444_28777_21090_28775_28721_28838_28584_22157; PSINO=1; locale=zh; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1556349111; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1556349111; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1',
}
request = urllib.request.Request(url=post_url,headers=headers)
formdata = urllib.parse.urlencode(formdata).encode()
response = urllib.request.urlopen(request,formdata)
print(response.read().decode())

爬虫入门二——post_第3张图片
上面这个实例若换其他单词会发生998错误主要是因为formdata中’sign‘ ’token‘这两个参数加密,需破解

你可能感兴趣的:(爬虫)