记录爬虫实战——有道翻译(反js加密)

  • 首先在from data里面观察哪些是加密了的数据
    记录爬虫实战——有道翻译(反js加密)_第1张图片
  • 然后找到js文件格式化后观察加密方式
    记录爬虫实战——有道翻译(反js加密)_第2张图片
  • 代码如下
import time
import random
from urllib import request, parse


times = time.time() * 1000
salt = str(times + random.randint(0, 9))

def getMD5(v):
    import hashlib
    md5 = hashlib.md5()
    v = v.encode()
    md5.update(v)
    v = md5.hexdigest()
    return v

def getSign(key):
    sign = 'fanyideskweb' + key + salt + 'n%A-rKaT5fb[Gy?;N5@Tj'
    sign = getMD5(sign)
    return sign

def translate(key):
    url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'

    data = {
     
        'i': key,
        'from': 'AUTO',
        'to': 'AUTO',
        'smartresult': 'dict',
        'client': 'fanyideskweb',
        'salt': salt,
        'sign': getSign(key),
        'ts': str(times),
        'bv': getMD5('5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'),
        'doctype': 'json',
        'version': '2.1',
        'keyfrom': 'fanyi.web',
        'action': 'FY_BY_REALTlME'
    }

    print(data)

    data = parse.urlencode(data).encode()

    headers ={
     
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        #'Accept-Encoding': 'gzip, deflate',
        'Accept - Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content - Length': len(data),
        'Content - Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Cookie': '[email protected]; JSESSIONID=aaaO_E15KjQgHhbgWhbYw; OUTFOX_SEARCH_USER_ID_NCOO=440425545.9314584; ___rl__test__cookies=1565584849062',
        'Host': 'fanyi.youdao.com',
        'Origin': 'http://fanyi.youdao.com',
        'Referer': 'http://fanyi.youdao.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest'
    }

    req = request.Request(url=url, data=data, headers=headers)

    rsp = request.urlopen(req)

    html = rsp.read().decode()
    print(html)


if __name__ == '__main__':
    word = input('请输入你要翻译的单词或短语:')
    translate(word)

你可能感兴趣的:(记录爬虫实战——有道翻译(反js加密))