爬虫第一步:基于requests的请求抓取

get方法抓取糗事百科并用BeautifulSoup解析得到第一条笑话。

#确定请求的url
url = "https://www.qiushibaike.com/hot/"
#网页请求
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text
#笑话解析提取
soup = BeautifulSoup(html, features="html.parser")
joke = soup.select('div.content')[0].get_text()

# 结果存储
with open('joke.txt', 'w', encoding='utf-8') as f:
    f.write(joke) 

Post方法请求有道翻译:

import requests
import json

#1.基于控制台获取输入-待翻译词语
content = input('请输入: ')
#2.设定待请求url
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
#3.建立post表单
post_form = {'i': content,
'from': 'AUTO',
'to': 'AUTO',
'smartresult': 'dict',
'client': 'fanyideskweb',
'salt': '15585139913702',
'sign': 'fb6ac0aa9aeb0e10da86acfb46224d4d',
'ts': '1558513991370',
'bv': 'b5362300738e7f121e2c37609aaa6bb3',
'doctype': 'json',
'version': '2.1',
'keyfrom': 'fanyi.web',
'action': 'FY_BY_REALTlME}'
}

#4.提交post请求
response = requests.post(url,data=post_form)

#5.接受响应结果并提取
trans_json = response.text
trans_dict = json.loads(trans_json)
result = trans_dict['translateResult'][0][0]['tgt']

print('翻译结果:',result)

完成版:

import requests
import json
while True:
    #1.基于控制台获取输入-待翻译词语
    content = input('请输入: ')
    #7.设置退出机制
    if content == "":
        print('没有有效输入,谢谢使用!')
        break

    #2.设定待请求url
    url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
    #3.建立post表单
    post_form = {'i': content,
    'from': 'AUTO',
    'to': 'AUTO',
    'smartresult': 'dict',
    'client': 'fanyideskweb',
    'salt': '15585139913702',
    'sign': 'fb6ac0aa9aeb0e10da86acfb46224d4d',
    'ts': '1558513991370',
    'bv': 'b5362300738e7f121e2c37609aaa6bb3',
    'doctype': 'json',
    'version': '2.1',
    'keyfrom': 'fanyi.web',
    'action': 'FY_BY_REALTlME}'
    }

    #4.提交post请求
    response = requests.post(url,data=post_form)

    #5.接受响应结果并提取
    trans_json = response.text
    trans_dict = json.loads(trans_json)
    result = trans_dict['translateResult'][0][0]['tgt']

    print('翻译结果:',result)

你可能感兴趣的:(爬虫第一步:基于requests的请求抓取)