参考链接:http://www.manongjc.com/article/76959.html
#! /home/python/.virtualenvs/py3_spider/bin/python
"""
2019-8-2
思路:
1.检测输入语言的类型,这个很好实现
2.开始进行翻译,难点:破解表单sign参数
3.破解流程:
a.浏览器全局搜索sign,找出js
b.dug调试,定位加密函数function e(r)
c.在Python中执行js代码,获取sign
d.难点:获取token,gtk,js替换
e.添加为Python脚本,增加可执行权限,可当翻译工具使用
"""
import re
import js2py
import requests
class BaiduFanyi(object):
"""百度中英文翻译"""
def __init__(self, keywords):
"""
:param keywords:待检测语言
"""
self.keywords = keywords
self.url_root = 'http://fanyi.baidu.com/' # 翻译根url
self.url_langdetect = 'https://fanyi.baidu.com/langdetect' # 检测语言url
self.url_trans = 'https://fanyi.baidu.com/v2transapi' # 执行翻译url
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36',
'origin': 'https://fanyi.baidu.com',
'referer': 'https://fanyi.baidu.com/?aldtype=16047'
}
self.data_langdetect = {
'query': self.keywords
}
self.session = requests.session()
self.session.headers = self.headers
# 创建执行js的环境
self.context = js2py.EvalJs()
def langdetect(self):
"""
发送请求,检测输入的语言类型
:return: 正常:en:英文,zh:中文;异常:None
"""
try:
response = self.session.post(self.url_langdetect, data=self.data_langdetect)
response_dict = response.json() # {'error': 0, 'msg': 'success', 'lan': 'zh'}
if response_dict.get('error') == 0:
return response_dict.get('lan')
except Exception as e:
print(e)
def get_token_gtk(self):
"""
获取token,gtk(用于合成sign)
:return:(token, gtk)
"""
response = self.session.get(self.url_root)
response_str = response.content.decode()
# 注意双引号问题
token = re.findall(r"token: '(.*?)'", response_str)[0]
gtk = re.findall(r";window.gtk = ('.*?');", response_str)[0]
return token, gtk
def trans(self, lan):
"""
发送请求,开始翻译
:return: 正常:翻译结果(str);异常:None
"""
try:
token, gtk = self.get_token_gtk()
# print(token, gtk) # 2d275a77fc7ba0609e7151f57859040d '320305.131321201'
js = r"""
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function e(r) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}
"""
# js中替换gtk
js = js.replace(r'null !== i ? i : (i = window[l] || "") || ""', gtk)
# print(js)
# 执行js,定义加密函数e(r)
self.context.execute(js)
# 执行加密函数e(r),对keywords进行加密
sign = self.context.e(self.keywords)
# print(sign) # 477811.239938
data = {
'from': lan,
'to': 'en' if lan == 'zh' else 'zh',
'query': self.keywords,
'transtype': 'translang',
'simple_means_flag': 3,
'sign': sign, # 此参数需破解
'token': token # 此参数需破解
}
response = self.session.post(self.url_trans, data=data)
response_dict = response.json()
ret = response_dict['trans_result']['data'][0]['dst']
return ret
except Exception as e:
print(e)
def run(self):
# 1.检测输入的语言类型
lan = self.langdetect()
if lan is None:
return
# 2.翻译
ret = self.trans(lan)
print('%s-->%s' % (self.keywords, ret)) # 中国-->China
def main():
while True:
keywords = input('please input the keywords:')
baidu_fanyi = BaiduFanyi(keywords)
baidu_fanyi.run()
if __name__ == '__main__':
main()