过去的二三年中,我一直关注的是机器翻译API在自动化翻译过程中的应用,包括采用CAT工具和Python编程语言来调用机器翻译API,然后再进行译后编辑,从而达到快速翻译的目的。
然而,我发现随着人工智能的发展,很多机器翻译也做了相应的调整,调用机器翻译api的样例也发生了变化,今天我特意把主流机器翻译api调用的Python代码汇聚于此,总共有七种方法,供大家学习参考。
目前官方网站的调用代码没有之前的好用,所以我们依然采用以前的代码,只用安装requests, 而不用安装额外的Python包,前提是只要知道Google翻译的API密匙和翻译的语对方向即可,代码如下:
import requests
def google_api(content):
url = "https://translation.googleapis.com/language/translate/v2"
data = {
'key': "YOUR_API_KEY", #你自己的api密钥
'source': "zh",
'target': "en",
'q': content,
'format': 'text'
}
headers = {'X-HTTP-Method-Override': 'GET'}
response = requests.post(url, data=data, headers=headers)
res = response.json()
text = res["data"]["translations"][0]["translatedText"]
return text
print("谷歌翻译:"+google_api("好好学习,天天向上!"))
Python调用百度机器翻译API的代码有些变化,但前提是需要申请百度的api id和key,放到以下代码中:
import requests
import random
import json
from hashlib import md5
# Set your own appid/appkey.
appid = 'YOUR APP ID'
appkey = 'YOU APP KEY'
# For list of language codes, please refer to `https://api.fanyi.baidu.com/doc/21`
from_lang = 'en'
to_lang = 'zh'
endpoint = 'http://api.fanyi.baidu.com'
path = '/api/trans/vip/translate'
url = endpoint + path
query = 'Hello World!'
# Generate salt and sign
def make_md5(s, encoding='utf-8'):
return md5(s.encode(encoding)).hexdigest()
def baidu_api(query,from_lang,to_lang):
salt = random.randint(32768, 65536)
sign = make_md5(appid + query + str(salt) + appkey)
# Build request
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
# Send request
r = requests.post(url, params=payload, headers=headers)
result = r.json()
# Show response
#print(json.dumps(result, indent=4, ensure_ascii=False))
return result["trans_result"][0]['dst']
print(baidu_api(query,from_lang, to_lang))
以下代码是经过我改进后的代码,效率更高一些,供大家参考。小牛现在提供100万字符/月的额度,大家可以申请试用。
import json
import requests
apikey="YOUR_API_KEY"
def translate(sentence, src_lan, tgt_lan):
url = 'http://api.niutrans.com/NiuTransServer/translation?'
data = {"from": src_lan, "to": tgt_lan, "apikey": apikey, "src_text": sentence}
res = requests.post(url, data = data)
res_dict = json.loads(res.text)
if "tgt_text" in res_dict:
result = res_dict['tgt_text']
else:
result = res
return result
if __name__ == "__main__":
while True:
line = input("请输入要翻译的文本:")
try:
trans = translate(line, 'auto', 'en')
print(trans+"\n---------")
except Exception as exc:
print(exc)
之前我已经写出采用Python调用同花顺机器翻译api来翻译字幕的代码,详情如下:
import json,os,sys
import requests
import easygui as g
# appid = '你申请的id'
# appkey = '你申请的key'
# 解说打包报错问题
os.environ['REQUESTS_CA_BUNDLE'] = os.path.join(os.path.dirname(sys.argv[0]), 'cacert.pem')
def get_key():
"""读取本地appid.txt,获取id和key并生成字典"""
dic={}
with open("appid.txt","r",encoding="utf-8") as f:
lines=[line.strip() for line in f.readlines()]
for line in lines:
ls=line.split("=")
dic[ls[0].strip()]=ls[1].strip()
return dic
def getToken():
"因为用同花顺的API时要校验token,由于token只有24小时的有效期,因此每次都要调用一下,以防过期。本函数获取最新的token"
tokenUrl = 'https://b2b-api.10jqka.com.cn/gateway/service-mana/app/login-appkey'
param = {}
param['appId'] = appid
param['appSecret'] = appkey
authResult = requests.post(tokenUrl, data=param)
authResult = authResult.content
res = json.loads(authResult)
access_token = ''
if 0 == res['flag']:
access_token = res['data']['access_token']
return access_token
def translate(texts, token):
"调用同花顺机器翻译API函数,注意这里是英文翻译成中文,如果要中文翻译成英文要修改【param['from']】为zh,【param['to'] 】为en"
ls=[]
if '' == token:
return
param = {}
param['app_id'] = appid
param['from'] = "en"
param['to'] = 'zh'
param['domain'] = 'default'
param['text'] = json.dumps(texts)
headers = {"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "open-authorization": "Bearer" + token}
url = 'https://b2b-api.10jqka.com.cn/gateway/arsenal/machineTranslation/batch/get/result'
response = requests.post(url, headers=headers, data=param)
Ret = response.content
res = json.loads(Ret)
if 0 == res['status_code']:
res = res['data']
for rst in res['trans_result']:
ls.append(rst['dst'])
return ls
def read_srt(file):
"读取字幕文件,生成字幕序号、时间和字幕内容的列表"
with open(file, "r", encoding = "utf-8-sig") as f:
lines=[line.strip() for line in f.readlines()]
max_line=len(lines)
if max_line<=600:
texts=lines[2:max_line:4]
times=lines[1:max_line:4]
nums=lines[:max_line:4]
else:
g.msgbox("字幕文件超过150行!请更换文件。")
return nums,times,texts
def add_to_srt(nums,times,texts,trans):
"生成新的字幕文件,把译文也添加上。"
final_list=[]
for (num,time,text,tran) in zip(nums,times,texts,trans):
new_line = num + "\n"+ time + "\n" + text + "\n" + tran +"\n\n"
final_list.append(new_line)
with open(srt.replace(".srt", "_new.srt"), "wt", encoding= "utf-8-sig") as f:
f.write("".join(final_list))
print("done!")
if __name__ == '__main__':
d=get_key()
appid=d["id"]
appkey=d["key"]
title='请打开字幕文件'
srt=g.fileopenbox(default='*.srt')
nums,times,texts=read_srt(srt)
token = getToken() #token有效期为24小时,请在应用程序中做好定时获取操作,避免token失效
trans=translate(texts, token)
add_to_srt(nums,times,texts,trans)
g.msgbox("字幕文件已经翻译成功,请在字幕所在目录查看。")
彩云这个小而美的机器翻译一直很低调,翻译的速度和质量都还不错。以下是相关的样例代码:
import requests
import json
url = "http://api.interpreter.caiyunai.com/v1/translator"
token = "你的令牌" #这里填写你自己的令牌
payload = {
#"source" : ["Where there is a will, there is a way.",
#"ColorfulClouds Weather is the best weather service."],
"source" : "Where there is a will, there is a way.",
"trans_type" : "en2zh",
"request_id" : "demo",
}
headers = {
'content-type': "application/txt",
'x-authorization': "token " + token,
}
response = requests.request("POST", url, data=json.dumps(payload), headers=headers)
#print(response.text)
resp = json.loads(response.text)['target']
print(resp)
阿里云机器翻译api的调用比较繁琐,申请过程也较复杂,其翻译质量倒时一般,大家可以有选择地使用以下代码:
from alibabacloud_alimt20181012.client import Client as alimt20181012Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_alimt20181012 import models as alimt_20181012_models
from alibabacloud_tea_util import models as util_models
ACCESS_KEY_ID = 【Access_key_id>】#这里把尖括号里的 Acess_key_id和Acess_key_secret分别修改为自己申请的通用翻译api
ACCESS_KEY_SECRET = 【Access_key_secret】
def create_client(
access_key_id: str,
access_key_secret: str,
) -> alimt20181012Client:
config = open_api_models.Config(
access_key_id=access_key_id,
access_key_secret=access_key_secret
)
config.endpoint = f'mt.cn-hangzhou.aliyuncs.com'
return alimt20181012Client(config)
def translate(text):
client = create_client(ACCESS_KEY_ID, ACCESS_KEY_SECRET)
translate_general_request = alimt_20181012_models.TranslateGeneralRequest(
format_type='text',
source_language='en',
target_language='zh',
source_text=text,
scene='general'
)
runtime = util_models.RuntimeOptions()
resp = client.translate_general_with_options(translate_general_request, runtime)
return resp.body.data.__dict__['translated']
print(translate("Rome is not built in a day."))
ChatGPT也可以用于翻译,只要我们给它发出指令即可。代码如下:
import openai
openai.api_base = "https://api.openai.com/v1"
openai.api_key = "YOUR_API_KEY"
model_engine_id = "text-davinci-003"
while True:
prompt = input("Q:")
completions = openai.Completion.create(
engine=model_engine_id,
prompt="Translate the following sentences into Chinese:"+prompt,
max_tokens=800,
)
message = completions.choices[0].text.strip()
print("A:",message,end="\n")
1. 本文总结了六大机器翻译api的调用方法,供大家参考。英译汉,个人推荐的是Google翻译。汉译英推荐百度翻译、小牛翻译等。译文润色可以调用ChatGPT帮忙。
2. 下一步,我将努力整合一下更多的机器翻译api的调用方法,添加在一起,也可以写一个调用包,以方便大家使用。