爬虫----爬取有道词典

#首次导入模块

import requests
import json
import hashlib
import random
import time
def md5(value):
# 创建MD5对象


  md5_obj = hashlib.md5()
# 加密字符串


 md5_obj.update(bytes(value, encoding="utf-8"))


  # 进行16位的加密
    sign = md5_obj.hexdigest()
    return sign
def youdao(i):
    #定义初始的url
    base_url="http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
    # 获取salt
    salt = str(int(time.time() * 1000)) + str(random.randint(0, 9))
    #获取sign

    sign1 = "fanyideskweb" + i + salt + "@6f#X3=cCuncYssPsuRUE"
    sign = md5(sign1)


    #post请求,定义data参数

    data={
        "i": i,
        # "from": "AUTO",
        # "to": "AUTO",
        # "smartresult": "dict",
        "client": "fanyideskweb",
        "salt": salt,
        "sign": sign,
        # "ts": "1558614945239",
        # "bv": "fb2ba7d69650ad4d6ceb3dc46e03624a",
        # "doctype": "json",
        # "version": "2.1",
        "keyfrom": "fanyi.web",
        # "action": "FY_BY_REALTlME",
    }
    headers={
        # "Accept": "application/json, text/javascript, */*; q=0.01",
        # "Accept-Encoding": "gzip, deflate",
        # "Accept-Language": "zh-CN,zh;q=0.9",
        # "Connection": "keep-alive",
        # "Content-Length": "238",
        # "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Cookie": "[email protected]; OUTFOX_SEARCH_USER_ID_NCOO=772659197.2409945; JSESSIONID=aaa-ssqq9U3wPeM2kxJRw; ___rl__test__cookies=1558614945235",
        # "Host": "fanyi.youdao.com",
        # "Origin": "http://fanyi.youdao.com",
        "Referer": "http://fanyi.youdao.com/",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
        # "X-Requested-With": "XMLHttpRequest",
    }
    #发起请求
    response = requests.post(base_url,data=data,headers=headers)
    #获取json数据
    json_data=response.json()
    print(json_data)
    print(type(json_data))
if __name__ == '__main__':
    i= input("请输入要翻译的内容:")
    youdao(i)

接下来是出现的问题的分析:
“”"
“第一次会遇到问题;{‘errorCode’”: “50}”,
解决的办法:
此时的解决方案是:加上请求头浏览器信息 再次发起请求
“问题2:还是获取不到信息 {'errorCode’50},”,
解决的方案是:把所有的请求头信息添加到headers中

“”"
“”"
通过分析:可变的只有:
“salt”: “15586149452397”,
“sign”: “0958578909a0042e066abb1eeba0eb17”,
“”"

salt: 需要看一下 是否需要加密

sign: 也要看一下是否需要加密

首要解决的问题是salt和sign生成的过程

“”"
salt=o.salt=i=r + parseInt(10 * Math.random(), 10);
= “” + (new Date).getTime()+parseInt(10 * Math.random(), 10)

o=r.generateSaltSign(t);
r.generateSaltSign(t)=t.generateSaltSign(t) = r(t)

return {
ts: r,
bv: t,
salt: i,
sign: n.md5(“fanyideskweb” + e + i + “@6f#X3=cCuncYssPsuRUE”)
}

sign=o.sign,

var r = function(e) {
var t = n.md5(navigator.appVersion),
r = “” + (new Date).getTime(),
i = r + parseInt(10 * Math.random(), 10);
return {
ts: r,
bv: t,
salt: i,
sign: n.md5(“fanyideskweb” + e + i + “@6f#X3=cCuncYssPsuRUE”)
}
e=t=翻译的内容
sign: n.md5(“fanyideskweb” + e + i + “@6f#X3=cCuncYssPsuRUE”)

= md5(“fanyideskweb” + “apple” + salt + “@6f#X3=cCuncYssPsuRUE”)

“”"

你可能感兴趣的:(爬虫----爬取有道词典)