Python爬虫系列-有道批量翻译英文单词-注音标版

爬虫系列更新-第二篇文章——《Python爬虫系列-有道批量翻译英文单词-注音标版》

之前发布计算机英文单词时研究了下,怎么把一个含有大量英文单词的txt文件翻译成如下格式:

Python爬虫系列-有道批量翻译英文单词-注音标版_第1张图片

如上图,左边图片是需要翻译的txt文本,右边图片是翻译后的txt文本。

Python爬虫系列-有道批量翻译英文单词-注音标版_第2张图片

运行的实际界面效果。

python代码参考了CSDN上的这个作者的帖子,他的分析博文很牛,但是没有批量翻译功能,所以我在他的代码的基础上添加了翻译中文、写入国际音标的功能,全部代码如下:

import hashlib
import base64
import requests
import json
import time
 
from urllib.parse import urlencode
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad, pad
 
 
class AESCipher(object):
    key = b'ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl'
    iv = b'ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4'
    iv = hashlib.md5(iv).digest()
    key = hashlib.md5(key).digest()
 
    @staticmethod
    def decrypt(data):
        # AES解密
        cipher = AES.new(AESCipher.key, AES.MODE_CBC, iv=AESCipher.iv)
        decrypted = cipher.decrypt(base64.b64decode(data, b'-_'))
        unpadded_message = unpad(decrypted, AES.block_size).decode()
        return unpadded_message
 
    @staticmethod
    def encrypt(plaintext: str):
        # AES加密
        cipher = AES.new(AESCipher.key, AES.MODE_CBC, iv=AESCipher.iv)
        plaintext = plaintext.encode()
        padded_message = pad(plaintext, AES.block_size)
        encrypted = cipher.encrypt(padded_message)
        encrypted = base64.b64encode(encrypted, b'-_')
        return encrypted
 
 
def get_form_data(sentence, from_lang, to_lang):
    """
    构建表单参数
    :param :sentence:翻译内容
    :param from_lang:源语言
    :param to_lang:目标语言
    :return:
    """
    e = 'fsdsogkndfokasodnaso'
    d = 'fanyideskweb'
    u = 'webfanyi'
    m = 'client,mysticTime,product'
    p = '1.0.0'
    b = 'web'
    f = 'fanyi.web'
    t = time.time()
 
    query = {
        'client': d,
        'mysticTime': t,
        'product': u,
        'key': e
    }
 
    # 获取sign值 - -密钥值
    h = hashlib.md5(urlencode(query).encode('utf-8')).hexdigest()
 
    form_data = {
        'i': sentence,
        'from': from_lang,
        'to': to_lang,
        'domain': 0,
        'dictResult': 'true',
        'keyid': u,
        'sign': h,
        'client': d,
        'product': u,
        'appVersion': p,
        'vendor': b,
        'pointParam': m,
        'mysticTime': t,
        'keyfrom': f
    }
    return form_data
 
 
def translate(sentence, from_lang='auto', to_lang=''):
    """
    :param sentence:需翻译的句子
    :param from_lang:源语言
    :param to_lang:目标语言
    :return:
    """
    # 有道翻译网页请求参数
    url = 'https://dict.youdao.com/webtranslate'
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
        'referer': 'https://fanyi.youdao.com/',
        'cookie': '[email protected]; OUTFOX_SEARCH_USER_ID_NCOO=818822109.5585971;'
    }
    params = get_form_data(sentence, from_lang, to_lang)

    res = requests.post(url, headers=headers, data=params)
    # 翻译结果进行AES解密
    cipher = AESCipher
    ret = json.loads(cipher.decrypt(res.text))
    #ret1 = json.dumps(ret,indent=4,ensure_ascii=False,sort_keys=False,separators=(",",";"))
    try:
        out = "英:[" + ret["dictResult"]["ec"]["word"]["ukphone"] + "] " + "美:[" + ret["dictResult"]["ec"]["word"]["usphone"] + "]"
        trans = ret["dictResult"]["ec"]["word"]["trs"]
        tgt = ret['translateResult'][0][0]['tgt']
        out = out + " 译:[" + tgt + "]"
        #for tran in trans:
            #if 'pos' in tran:
                #out = out + " " + tran['pos'] + " " + tran['tran']
        #print(out)
        return out
    except Exception as e:
        print('翻译失败:', e)
        return 0
        
if __name__ == '__main__':
    path = input("请输入你要翻译的txt文档路径(E:\1.txt): ")
    # result = translate(word)
    out = ""
    with open(path,'r') as f:
        lines = f.readlines()
        for line in lines:
            print(line.replace("\n","").replace("\r",""))
            result = translate(line)#'zh-CHS', 'ja')
            if result:
                out =  out + line.replace("\n","").replace("\r","") + " " + result + "\n"
            #print(out)
    #print('翻译结果:\n', result)
        f.close()
    with open(path[:-4] + "_已翻译.txt",'w',encoding='utf-8') as fout:
        fout.write(out)
        fout.close()
        print(out)
        print("已完成!")
    

调用方法如下:

把代码保存到.py文件中,运行.py文件,输入需要翻译的txt文本路径地址,如下图所示:

Python爬虫系列-有道批量翻译英文单词-注音标版_第3张图片然后翻译后的txt,也会出现在之前的文本文件目录里,如下图: Python爬虫系列-有道批量翻译英文单词-注音标版_第4张图片

 上图中"1.txt"就是输入的英文文档,"1_已翻译.txt"就是翻译后的文档。

你可能感兴趣的:(Python爬虫系列,python,开发语言)