基于百度翻译API实现国际化自动翻译脚本
(背景:手写多语言的KEY,替换文件中的中文繁琐、耗时)
1、通过接入百度翻译API生成xx.strings所需要的KEY
2、通过分析项目中的.m文件找到所需的翻译的中文,生成KEY、VALUE的对应关系,并替换.m文件中的中文
3、将翻译生成的对应关系的词条写入xx.strings文件
操作方式:
进入项目autoTranslation.py所在的目录下,执行python autoTranslation.py "HBTest"
tips:HBTest代表xx.strings中KEY的前缀
目录如下
主要需要三个python文件,如果自己路径有变化,修改成对应路径。
#!/usr/bin/env python3
#-*- coding:utf-8 -*-
fliterFileNameList=['xx.m']//过滤部分不需要翻译的文件
import os,sys
from autoTranslationHelper import translateHelper
def filter_oc_file(dirpath,pretext):
L1=[]
ext = ('.m','.mm')
for x in os.listdir(dirpath):
if x not in fliterFileNameList:
fullPath=os.path.join(dirpath,x)
L1.append(fullPath)
L1 = [x for x in L1 if os.path.isfile(x) and x.endswith(ext)]
if L1:
[translateHelper.handle_oc_file(x,pretext) for x in L1]
L2 = [os.path.join(dirpath,x) for x in os.listdir(dirpath)]
L2 = [x for x in L2 if os.path.isdir(x)]
if L2:
[filter_oc_file(x,pretext) for x in L2]
if __name__ == '__main__':
translateHelper.remove_Localized()
dirpath = os.path.abspath('./xx/xxx')//需要翻译的文件夹目录
filter_oc_file(dirpath,sys.argv[1])
translateHelper.write_oc_file()
print('----------------------FINISH-----------------------')
print('---------------------------------------------------')
# -*- coding: utf-8 -*-
import os,datetime,re
from autoTranslationHelper import translate
SPLIT_LIST = ('%@', '%d', '%ld', '%lld', '%f')
def handle_oc_file(filepath,pretext):
content = read_file(filepath)
locallist = []
translatefaillist = []
placeHolderlist = []
endIndex = 0
while True:
startIndex = find_start_symbol(content,endIndex)
if startIndex == -1:
break
endIndex = find_end_symbol(content,startIndex + 2)
msg1 = content[startIndex+2:endIndex]
if is_chinese(msg1) != True :
continue
if "%" in msg1:
continue
msg = content[startIndex:endIndex + 1]
if is_already_localized(content,startIndex):
continue
filterIndex = is_filter(content,startIndex)
if filterIndex == 0:
continue
content = replace_placeholder(content, msg, startIndex, endIndex,locallist,translatefaillist,filepath,pretext)
endIndex = startIndex + 2
newText = translate_localized(content,msg,locallist,translatefaillist,filepath,pretext)
print('---------------------------------------------------')
print('处理文件完毕:%s'%(os.path.split(filepath)[1]))
print('共计替换%s处。'%len(locallist))
print('翻译失败%s处。'%len(translatefaillist))
print('翻译失败字段列表:\n%s'%translatefaillist)
print('---------------------------------------------------')
if len(locallist) > 0:
write_Localized(locallist)
write_file(content,filepath)
#翻译中文并替换
def translate_localized(content,msg,locallist,translatefaillist,filepath,pretext):
translateword = msg.replace('\\n', '')[2:len(msg) - 1]
enKey = translate.chinese_to_english(translateword).replace(':', '')
wordlist = enKey.split(' ')
shortwordlist = [x.lower() for x in wordlist[:6]]
enKey = '_'.join(shortwordlist)
enKey = enKey.replace('\\n', '')
newText = ''
if enKey:
textKey = pretext + '_' + enKey
newText = 'HBLocalizedString(@"' + textKey + '")'
localText = '"' + textKey + '" = ' + msg[1:] + ';'
locallist.append(localText)
else:
failText = msg + '\n'
translatefaillist.append(failText)
return newText
#读取文件
def read_file(filepath):
with open(filepath,'r') as f:
content = f.read()
return content
#寻找字符串语句开始符号,即@"
def find_start_symbol(content,start):
newcontent = content[start:]
index = newcontent.find('@"')
if index == -1:
return index
return index + start
#寻找字符串语句结尾符号,即"
def find_end_symbol(content,start):
newcontent = content[start:]
index = newcontent.find('"')
if index == -1:
return index
return index + start
#写本地国际化文件
def write_Localized(locallist):
listOld=[]
#先去重
localized_dir = os.path.join(os.path.abspath('.'),'autoTranslationHelper')
new_file_path = os.path.join(localized_dir,'localized.txt')
#读file内容
if os.path.exists(new_file_path):
with open(new_file_path,'r') as f:
oldContent =f.read()
#跟新追加内容去重合并
listOld = oldContent.split('\n')
newSet = set(listOld)
newSet=newSet.union(set(locallist))
content = '\n'.join(newSet)
#覆盖写
write_file(content,new_file_path)
#将替换后的加入的txt文件,重新写入
def write_file(content,filepath):
with open(filepath,'w+') as f:
f.write(content)
#每次运行脚本之前,删除上次运行脚本保存的结果
def remove_Localized():
localized_dir = os.path.join(os.path.abspath('.'),'autoTranslationHelper')
new_file_path = os.path.join(localized_dir,'localized.txt')
if os.path.exists(new_file_path):
os.remove(new_file_path)
#将txt文件追加写入oc文件
def write_oc_file():
txt_localized_dir = os.path.join(os.path.abspath('.'),'autoTranslationHelper')
txt_file_path = os.path.join(txt_localized_dir,'localized.txt')
if os.path.exists(txt_file_path):
with open(txt_file_path,'r') as f:
txtContent =f.read()
txtContent = '\n'+txtContent
cloudLanguage_file_path = os.path.abspath('当前需要翻译的文件路径/xx.strings')
with open(cloudLanguage_file_path,'a') as f:
f.write(txtContent)
#判断字符串是否已经被替换过了
def is_already_localized(content,startIndex):
newmsg = content[:startIndex]
index = newmsg.rfind('(')
if index == -1:
return False
else:
#():@"" 防止这种情况出现
specialSymbol = content[startIndex - 20:startIndex]
specialSymbol = specialSymbol.strip()
#防止为空 时,下一个判断,数组越界
if len(specialSymbol) == 0:
specialSymbol = 'TEST'
if specialSymbol[-1] == ':':
return False
elif newmsg.rfind(';') > index:
return False
else:
return False
#过滤 NSLog return NSWarning
def is_filter(content,startIndex):
newmsg = content[startIndex-10:startIndex]
index1 = newmsg.rfind('NSLog')
index2 = newmsg.rfind('//')
index3 = newmsg.rfind('NSWarning')
if index1 > -1 or index2 > -1 or index3 >-1:
return 0
else:
return -1
#分割字符串,并保留分割符号;返回分割后的list
def split_msg(msg,splitmsg):
newmsg = msg;
for x in splitmsg:
newmsg = newmsg.replace(x,'PY_SP_PEARL' + x + 'PY_SP_PEARL')
l = newmsg.split('PY_SP_PEARL')
return l
#替换含有占位符的字符串中的中文
def replace_placeholder(content,msg,startIndex,endIndex,locallist,translatefaillist,filepath,pretext):
paramlist=[]
tmpmsg = msg[2:-1]
global SPLIT_LIST
formatlist = split_msg(tmpmsg,SPLIT_LIST)
i = -1
for x in formatlist:
i += 1
if is_contains_chinese(x):
localized_after_translate = translate_localized(content,'@"' + x + '"',locallist,translatefaillist,filepath,pretext)
paramlist.insert(i,localized_after_translate)
paramstr = ','.join(paramlist)
newSentence = content[startIndex:startIndex] + paramstr
LocalizedString ='HBLocalizedString(@"'
newmsg = content[startIndex-len(LocalizedString)+2:startIndex+2]
index = newmsg.rfind(LocalizedString)
if index > -1:
print(content[:startIndex-len(LocalizedString)+2])
print(content[endIndex + 2:])
return content[:startIndex-len(LocalizedString)+2] + newSentence + content[endIndex + 2:]
else:
return content[:startIndex] + newSentence + content[endIndex + 1:]
#是否是中文
def is_chinese(uchar):
if uchar >= u'\u4e00' and uchar<=u'\u9fa5':
return True
else:
return False
#是否包含中文字符
def is_contains_chinese(msg):
zhPattern = re.compile(u'[\u4e00-\u9fa5]+')
match = zhPattern.search(msg)
if match:
return True
else:
return False
执行命令的时候可能出现以下错误:Traceback (most recent call last):
File "autoTranslation.py", line 7, in
from autoTranslationHelper import translateHelper
ImportError: No module named autoTranslationHelper
不用着急,命令行需要python3,
两种解决方案,把默认的python版本改成3,或者python3 autoTranslation.py "HBTest"一直用python3运行。