根据有道翻译软件的功能,结合实际用途,基于Python3.6写了一个有道翻译助手软件。
测试文件及源代码已上传至:https://github.com/MMMMMichael/Translation-assistant
利用了有道翻译(有道智云)提供的API接口,实现了文章中单词提取并翻译,另外可以将有道翻译导出的txt文件或xml文件转换为Excel文件,方便平时查看。
还以可以将英文文章导入,利用提取单词并翻译功能将文章分割为单词并查询意思,最终生成可以导入有道词典的xml文件。
同时还添加了将百度中文导出的txt文件转换为Excel格式文件的功能。
python有道翻译助手源代码如下:
# -*- coding:utf-8 -*-
"""
Create on 2017-10-28
Author : Mu Yachao
Email : [email protected]
Function :
1 : 将有道词典导出的txt或xml格式单词转换到Excel表格中
2 : 分离全英文文章中的单词并去重
3 : 批量查询分理出的单词
4 : 将分离出的单词保存为可以导入有道词典的xml格式
5 : 将百度中文导出txt文件转换为Excel文件
"""
import random
import http.client
from hashlib import md5
import urllib
from openpyxl import load_workbook
from tkinter import *
from tkinter import ttk
from tkinter.filedialog import askopenfilename, asksaveasfilename
from tkinter.scrolledtext import ScrolledText
from tkinter import messagebox
import _thread
def extract_words(txt_file, exist_words=[], min_len=1):
"""提取文档中单词并去重。exist_word为已经存在单词列表,返回list为单词长度大于min_len且不重复的单词。"""
file = open(txt_file, 'r')
f = file.read()
file.close()
all_words = re.findall("\w+", str.lower(f)) # Matching all words and convert them to lowercase.
words = sorted(list(set(all_words))) # Remove the same elements from the list and sort them.
result = []
for word in words:
num = re.search('\d+', word) # Remove numbers.
w = re.search('[\u4e00-\u9fa5]', word) # Remove Chinese words.
if not num and not w and word not in exist_words and len(word) > min_len:
result.append(word)
return list(set(result)) # set(list)
def extract_phrase(txt_file,min_len=1):
"""提取短语列表中的短语,默认一行为一个短语"""
result = []
with open(txt_file, 'r') as file:
for f in file:
w = re.search('[\u4e00-\u9fa5]', f) # Remove Chinese words.
if not w and len(f.strip('\n')) > min_len:
result.append(f.strip('\n'))
return list(set(result))
def get_words(file):
"""提取文档中的单词及其音标和解释 或短语及其解释,返回列表,其中为元组。"""
f = open(file, 'r', encoding='utf-16')
text = f.read().replace('\n', '')
f.close()
txt = re.sub('\d+,\s?', '\n', text).split('\n')
result = []
for t in txt:
if '[' in t: # word
regex = '(\w+)\s+(\[.*?])(.*?)$'
s = re.findall(regex, t)
if len(s): # Word + Pronunciation + Translation
result.append(s[0])
else: # phrase
regex = '(.*?)\s+([\u4e00-\u9fa5].*?$)'
s = re.findall(regex, t)
if len(s): # phrase+translation
result.append((s[0][0].strip(), "", s[0][1]))
elif len(t): # phrase(without translation)
result.append((t.strip(), "", ""))
return result # list [('group', '[gruːp]', 'n. 组;团体;'),(),()...]
def get_xml_words(file):
f = open(file, 'r', encoding='utf-16')
text = f.read()
f.close()
result = []
regex = '.*?(.*?) .*?.*?'
items = re.findall(regex, text, re.S)
for item in items:
result.append((item[0], item[2], item[1]))
return result # list [('group', '[gruːp]', 'n. 组;团体;'),(),()...]
def get_chinese(file='', judge=[]):
words = []
with open(file, 'r') as file:
for line in file:
items = re.findall("(\d+)\s(\w+)\t+(.*?)##(.*)\n", line)
for item in items:
if item[1] not in judge:
judge.append(item[1])
words.append((item[1], item[2], item[3],))
return words
def write_words_to_excel(excel_path, words_list):
"""将列表中单词或短语列表写入excel文件。excel_path:已经存在的excel文件路径;word_list:要写入的单词或短语列表"""
exist_words = []
try:
excel = load_workbook(excel_path)
except:
# print('No Such File or Directory:', excel_path)
return
sheet_name = excel.get_sheet_names() # Get the current sheet name
sheet = excel.get_sheet_by_name(sheet_name[0])
rows = sheet.max_row # Get the max row number
for rx in range(1, rows + 1): # Go through the words in the current sheet
exist_words.append(sheet.cell(row=rx, column=1).value)
word_count = 0
for words in words_list:
if words[0] not in exist_words:
word_count += 1
for c in range(0, 3):
sheet.cell(row=rows + word_count, column=c + 1).value = words[c]
else: # The word in the list already exists in the excel file
# print(words)
pass
# print('新增 %d 行。' % word_count)
excel.save(excel_path)
return word_count
def translate(appKey='', secretKey='', q='', is_word=True):
"""通过有道翻译API翻译单词或短语。
:param appKey: 你的应用ID
:param secretKey: 您的应用密钥
:param q: 要查询的单词或短语
:param is_word: 是否为单词,默认为True
:return: 返回字典,单词时返回{word:'',phonetic:'',explains:''}, 短语时返回{phrase:'',translation:''}
"""
httpClient = None
myurl = '/api'
fromLang = 'EN'
toLang = 'zh-CHS'
salt = random.randint(1, 65536)
sign = appKey + q + str(salt) + secretKey
m1 = md5()
m1.update(sign.encode())
sign = m1.hexdigest()
myurl = myurl + '?appKey=' + appKey + '&q=' + urllib.parse.quote(
q) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(salt) + '&sign=' + sign
try:
httpClient = http.client.HTTPConnection('openapi.youdao.com')
httpClient.request('GET', myurl)
# response是HTTPResponse对象
response = eval(httpClient.getresponse().read().decode())
if response['errorCode'] == '0':
# print(response.read().decode()) # translate result
if is_word:
if 'basic' in response.keys():
explains = ''
if 'explains' in response['basic'].keys():
for item in response['basic']['explains']:
explains += item + ';'
if 'phonetic' in response['basic'].keys():
return {'word': q, 'phonetic': response['basic']['phonetic'], 'explains': explains.strip(';')}
else:
return {'word': q, 'phonetic': "", 'explains': explains.strip(';')}
else:
return {'word': q, 'phonetic': "", 'explains': ''}
else:
if 'translation' in response.keys():
translation = ''
for item in response['translation']:
translation += item + ';'
return {'phrase': q, 'translation': translation}
else:
return {'phrase': q, 'translation': ''}
else:
return response
except Exception as e:
pass
finally:
if httpClient:
httpClient.close()
class Application():
def __init__(self):
self.root = Tk()
self.root.title('词汇助手')
self.root.resizable(False, False)
"""Initialization """
self.english_articles_path = StringVar() # 英文文章路径
self.english_list_path = StringVar() # 英文短语列表路径
self.file_from_youdao_path = StringVar() # 有道导出文件(xml or txt)
self.excel_from_youdao_file_path = StringVar() # 有道导出文件转换的excel
self.label_youdao_file_to_excel_result = StringVar() # 有道导出文件转换excel结果
self.chinese_txt_file_path = StringVar() # 中文成语文件
self.excel_from_chinese_path = StringVar() # 中文成语转换的Excel文件
self.label_chinese_to_excel_result = StringVar() # 中文成语转化Excel文件结果
""" Function menu """
self.menu_bar = Menu(self.root)
self.menu_bar.add_command(label='关于', command=self.help_about)
self.root.config(menu=self.menu_bar) # == self.root['menu'] = self.menu_bar
"""Main Tab"""
self.tab_menu = ttk.Notebook(self.root)
self.tab_menu.grid(row=1, column=1)
self.tab_extract = ttk.Frame(self.tab_menu)
self.tab_menu.add(self.tab_extract, text=' 单词提取 ')
self.tab_convert = ttk.Frame(self.tab_menu)
self.tab_menu.add(self.tab_convert, text=' 格式转换 ')
self.tab_conf = ttk.Frame(self.tab_menu)
self.tab_menu.add(self.tab_conf, text=' 配置信息 ')
"""格式转换"""
# youdao file to Excel
self.frm_word = LabelFrame(self.tab_convert, text='有道词典导出文件转换Excel')
self.frm_word.grid(row=1, column=1, pady=10, padx=10)
Entry(self.frm_word, textvariable=self.file_from_youdao_path).grid(row=1, column=1, padx=5)
Button(self.frm_word, text='选择有道词典导出文件', command=self.choose_file_from_youdao_path).grid(row=1, column=2)
Entry(self.frm_word, textvariable=self.excel_from_youdao_file_path).grid(row=2, column=1, pady=5)
Button(self.frm_word, text=' 选择 Excel 文件路径 ', command=self.choose_excel_from_youdao_file_path).grid(row=2,
column=2)
Button(self.frm_word, text='开始转换', command=self.change_youdao_outfile_to_excel).grid(row=3, column=1, pady=5)
Label(self.frm_word, textvariable=self.label_youdao_file_to_excel_result).grid(row=3, column=2)
# chinese to excel
self.frm_chinese = LabelFrame(self.tab_convert, text='百度中文导出文件转换Excel')
self.frm_chinese.grid(row=2, column=1, pady=10)
Entry(self.frm_chinese, textvariable=self.chinese_txt_file_path).grid(row=1, column=1, padx=5)
Button(self.frm_chinese, text='选择百度中文导出文件', command=self.choose_chinese_txt_file_path).grid(row=1, column=2)
Entry(self.frm_chinese, textvariable=self.excel_from_chinese_path).grid(row=2, column=1, pady=5)
Button(self.frm_chinese, text=' 选择 Excel 文件路径 ', command=self.choose_excel_from_chinese_path).grid(row=2,
column=2)
Button(self.frm_chinese, text='开始转换', command=self.change_chinese_to_excel).grid(row=3, column=1, pady=5)
Label(self.frm_chinese, textvariable=self.label_chinese_to_excel_result).grid(row=3, column=2)
"""配置信息"""
self.frm_conf = LabelFrame(self.tab_conf, text='有道翻译API配置信息')
self.frm_conf.grid(row=2, column=1, padx=10, pady=10)
Label(self.frm_conf, text='输入您的应用ID:').grid(row=1, column=1)
self.et_app_key = Entry(self.frm_conf)
self.et_app_key.grid(row=1, column=2, padx=10, pady=10)
Label(self.frm_conf, text='输入您的应用密钥:').grid(row=2, column=1)
self.et_secret_key = Entry(self.frm_conf)
self.et_secret_key.grid(row=2, column=2, padx=10, pady=10)
Label(self.frm_conf, text='注:每次关闭程序自动删除ID').grid(row=3, column=2)
"""单词提取"""
self.frm_extract = Frame(self.tab_extract)
self.frm_extract.grid(row=1, column=1, pady=5)
"""Tab frame"""
self.tab_ctrl = ttk.Notebook(self.frm_extract)
self.tab_ctrl.grid(row=1, column=1)
self.tab_word = ttk.Frame(self.tab_ctrl)
self.tab_ctrl.add(self.tab_word, text='文章单词提取')
self.tab_phrase = ttk.Frame(self.tab_ctrl)
self.tab_ctrl.add(self.tab_phrase, text="列表短语提取")
"""tab_word"""
self.frm_choose_word = Frame(self.tab_word)
self.frm_choose_word.grid(row=1, column=1)
Entry(self.frm_choose_word, textvariable=self.english_articles_path).grid(row=1, column=1, padx=5)
Button(self.frm_choose_word, text='选择英文文章txt', command=self.choose_english_article_path).grid(row=1, column=2)
Button(self.frm_choose_word, text='提取单词', command=self.extract_words_from_english_articles).grid(row=1,
column=3)
self.frm_word_btn = Frame(self.tab_word)
self.frm_word_btn.grid(row=2, column=1)
Button(self.frm_word_btn, text='保存为有道xml格式', command=self.save_word_as_xml).grid(row=1, column=2, padx=13)
Button(self.frm_word_btn, text='翻译并保存为xml格式', command=self.translate_word).grid(row=1, column=3)
"""tab_phrase"""
self.frm_choose_phrase = Frame(self.tab_phrase)
self.frm_choose_phrase.grid(row=1, column=1)
Entry(self.frm_choose_phrase, textvariable=self.english_list_path).grid(row=1, column=1, padx=5)
Button(self.frm_choose_phrase, text='选择短语列表txt', command=self.choose_english_list_path).grid(row=1, column=2)
Button(self.frm_choose_phrase, text='提取短语', command=self.extract_phrase_from_english_list).grid(row=1, column=3)
self.frm_phrase_btn = Frame(self.tab_phrase)
self.frm_phrase_btn.grid(row=2, column=1)
Button(self.frm_phrase_btn, text='保存为有道xml格式', command=self.save_phrase_as_xml).grid(row=1, column=2, padx=13)
Button(self.frm_phrase_btn, text='翻译并保存为xml格式', command=self.translate_phrase).grid(row=1, column=3)
self.st_word_list = ScrolledText(self.frm_extract, width=32, height=8, background='#ffffff', font=('微软雅黑',), )
self.st_word_list.grid(row=3, column=1)
def choose_english_article_path(self):
path_ = askopenfilename(filetypes=(("Text Files", "*.txt"), ("All files", "*.*")))
self.english_articles_path.set(path_)
def choose_english_list_path(self):
path_ = askopenfilename(filetypes=(("Text Files", "*.txt"), ("All files", "*.*")))
self.english_list_path.set(path_)
def choose_file_from_youdao_path(self):
path_ = askopenfilename(filetypes=(("Text files", "*.txt"), ("XML files", "*.xml")))
self.file_from_youdao_path.set(path_)
def choose_excel_from_youdao_file_path(self):
path_ = askopenfilename(filetypes=(("Excel File", "*.xlsx*"),))
self.excel_from_youdao_file_path.set(path_)
def choose_chinese_txt_file_path(self):
path_ = askopenfilename(filetypes=(("Text files", "*.txt"),))
self.chinese_txt_file_path.set(path_)
def choose_excel_from_chinese_path(self):
path_ = askopenfilename(filetypes=(("Excel File", "*.xlsx*"),))
self.excel_from_chinese_path.set(path_)
def help_about(self):
messagebox.showinfo("About", ' Version: V 1.0' + __doc__)
return
def extract_words_from_english_articles(self):
if self.english_articles_path.get() == '':
messagebox.showinfo('提示', '请选择英文文章txt文件')
return
try:
words = extract_words(txt_file=self.english_articles_path.get())
except:
messagebox.showerror('Error', '提取失败,请检查txt文件格式或编码')
return
self.st_word_list.delete(0.0, END) # clear the scroll text
for word in words:
self.st_word_list.insert(END, word + '\n')
self.st_word_list.insert(0.0, '\t共 %d 个单词\n\n' % len(words))
def extract_phrase_from_english_list(self):
if self.english_list_path.get() == '':
messagebox.showinfo('提示', '请选择短语列表txt文件')
return
try:
phrase = extract_phrase(txt_file=self.english_list_path.get())
except:
messagebox.showerror('Error', '提取失败,请检查txt文件格式或编码')
return
self.st_word_list.delete(0.0, END) # clear
for ph in phrase:
self.st_word_list.insert(END, ph + '\n')
self.st_word_list.insert(0.0, '\t共 %d 个短语\n\n' % len(phrase))
def change_youdao_outfile_to_excel(self):
if self.file_from_youdao_path.get() == '' or self.excel_from_youdao_file_path.get() == '':
messagebox.showwarning('警告', '请选择有道词典导出的txt或xml文件以及要导出的excel文件')
return
if self.file_from_youdao_path.get().endswith('xml'):
try:
change_xml_words = write_words_to_excel(self.excel_from_youdao_file_path.get(),
get_xml_words(self.file_from_youdao_path.get()))
self.label_youdao_file_to_excel_result.set('新增 %d 行' % change_xml_words)
except:
messagebox.showerror('Error', '转换失败,请关闭Excel文件或检查xml文件格式及编码。')
elif self.file_from_youdao_path.get().endswith('txt'):
try:
change_words = write_words_to_excel(self.excel_from_youdao_file_path.get(),
get_words(self.file_from_youdao_path.get()))
self.label_youdao_file_to_excel_result.set('新增 %d 行' % change_words)
except:
messagebox.showerror('Error', '转换失败,请关闭Excel文件或检查txt文件格式及编码。')
def change_chinese_to_excel(self):
if self.chinese_txt_file_path.get() == '' or self.excel_from_chinese_path.get() == '':
messagebox.showwarning('警告', '请选择从百度中文导出的txt文件和要导入的Excel文件')
return
try:
chinese_list = get_chinese(file=self.chinese_txt_file_path.get())
except:
messagebox.showwarning('警告', '请检查txt文件格式及编码,确保为百度中文导出文件。')
return
try:
count = write_words_to_excel(self.excel_from_chinese_path.get(), chinese_list)
self.label_chinese_to_excel_result.set('新增 %d 行' % count)
except:
messagebox.showwarning('警告', '转换失败,请关闭excel文件并重试')
def save_as_xml(self, items, xml):
xml_ = open(xml, 'w', encoding='utf-16')
xml_.write('')
for item in items:
line = '- \t
' + item[0] + ' \n\t'
line += '' + item[1] + ' \n\t'
line += '' + item[2] + ' \n\t \n\t\n '
xml_.write(line)
xml_.write(' ')
xml_.close()
messagebox.showinfo('Info', '保存成功')
def save_word_as_xml(self):
def swax():
if self.english_articles_path.get() == '':
messagebox.showinfo('提示', '请选择txt文章')
return
w_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
defaultextension='.xml')
if w_xml_file == '':
return
try:
words = extract_words(txt_file=self.english_articles_path.get())
except:
messagebox.showerror('Error', '保存失败,请检查txt文件格式或编码')
return
fina_words = []
for word in words:
fina_words.append([word, '', ''])
self.save_as_xml(fina_words, w_xml_file)
_thread.start_new_thread(swax, ())
def save_phrase_as_xml(self):
def spax():
if self.english_list_path.get() == '':
messagebox.showinfo('提示', '请选择txt短语列表')
return
p_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
defaultextension='.xml')
if p_xml_file == '':
return
try:
phrase = extract_phrase(txt_file=self.english_list_path.get())
except:
messagebox.showerror('Error', '保存失败,请检查txt文件格式或编码')
return
fina_phrase = []
for ph in phrase:
fina_phrase.append([ph, '', ''])
self.save_as_xml(phrase, p_xml_file)
_thread.start_new_thread(spax, ())
def translate_word(self):
def trans_word():
if self.english_articles_path.get() == '':
messagebox.showinfo('提示', '请选择txt文章')
return
if self.et_app_key.get() == '' or self.et_secret_key.get() == '':
messagebox.showinfo('提示', '请点击“配置信息”按钮输入相应信息!')
return
w_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
defaultextension='.xml')
if w_xml_file == '':
return
try:
words = extract_words(txt_file=self.english_articles_path.get())
except:
messagebox.showerror('Error', '翻译失败,请检查txt文件格式或编码')
return
translate_result = []
self.st_word_list.insert(0.0, 'Please wait ...\n')
self.st_word_list.update()
for word in words:
try:
youdao = translate(appKey=self.et_app_key.get(), secretKey=self.et_secret_key.get(), q=word)
if 'errorCode' in youdao.keys():
if youdao['errorCode'] == '108':
messagebox.showerror('error', '应用ID无效')
self.st_word_list.delete(1.0, 2.0)
return
temp = [youdao['word'], '',
'']
translate_result.append(temp)
except Exception as err:
messagebox.showwarning('Warning', '翻译失败,请检查网络连接!')
self.save_as_xml(translate_result, w_xml_file)
self.st_word_list.delete(1.0, 2.0)
_thread.start_new_thread(trans_word, ())
def translate_phrase(self):
def trans_phrase():
if self.english_list_path.get() == '':
messagebox.showinfo('提示', '请选择txt短语列表')
return
if self.et_app_key.get() == '' or self.et_secret_key.get() == '':
messagebox.showinfo('提示', '请点击“配置信息”按钮输入相应信息!')
return
p_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
defaultextension='.xml')
if p_xml_file == '':
return
try:
phrase = extract_phrase(txt_file=self.english_list_path.get())
except:
messagebox.showerror('Error', '翻译失败,请检查txt文件格式或编码')
return
self.st_word_list.insert(0.0, 'Please wait ...\n')
self.st_word_list.update()
translate_result = []
try:
for ph in phrase:
youdao = translate(appKey=self.et_app_key.get(), secretKey=self.et_secret_key.get(), q=ph,
is_word=False)
if 'errorCode' in youdao.keys():
if youdao['errorCode'] == '108': # 异常处理
messagebox.showerror('error', '应用ID无效')
self.st_word_list.delete(1.0, 2.0)
return
translate_result.append([youdao['phrase'], '', ''])
except:
messagebox.showwarning('Warning', '翻译失败,请检查网络连接!')
return
self.save_as_xml(translate_result, p_xml_file)
self.st_word_list.delete(1.0, 2.0)
_thread.start_new_thread(trans_phrase, ())
if __name__ == '__main__':
app = Application()
app.root.mainloop()
主要实现的功能如下所示:
将有道词典导出的txt或xml格式单词转换为Excel格式
分离全英文文章中的单词并去重
批量查询分离出的单词
将分离出的单词保存为可以导入有道词典的xml格式
将百度中文导出txt文件转换为Excel文件
另外,由于Python是基于C语言的底层库运行,所以如果电脑上没有python环境或者VS环境,请先安装vc_redist.x64.exe(32位系统安装vc_redist.x86.exe)以保证程序正常运行。
微软官方下载地址
或:https://www.microsoft.com/zh-CN/download/details.aspx?id=30679