python 有道翻译助手

根据有道翻译软件的功能,结合实际用途,基于Python3.6写了一个有道翻译助手软件。

测试文件及源代码已上传至:https://github.com/MMMMMichael/Translation-assistant

利用了有道翻译(有道智云)提供的API接口,实现了文章中单词提取并翻译,另外可以将有道翻译导出的txt文件或xml文件转换为Excel文件,方便平时查看。

还以可以将英文文章导入,利用提取单词并翻译功能将文章分割为单词并查询意思,最终生成可以导入有道词典的xml文件。

同时还添加了将百度中文导出的txt文件转换为Excel格式文件的功能。

python有道翻译助手源代码如下:

# -*- coding:utf-8 -*-  
"""
    Create on  2017-10-28
    Author : Mu Yachao
    Email   : [email protected]
    Function :
        1 : 将有道词典导出的txt或xml格式单词转换到Excel表格中
        2 : 分离全英文文章中的单词并去重
        3 : 批量查询分理出的单词
        4 : 将分离出的单词保存为可以导入有道词典的xml格式
        5 : 将百度中文导出txt文件转换为Excel文件
"""

import random
import http.client
from hashlib import md5
import urllib
from openpyxl import load_workbook
from tkinter import *
from tkinter import ttk
from tkinter.filedialog import askopenfilename, asksaveasfilename
from tkinter.scrolledtext import ScrolledText
from tkinter import messagebox
import _thread


def extract_words(txt_file, exist_words=[], min_len=1):
    """提取文档中单词并去重。exist_word为已经存在单词列表,返回list为单词长度大于min_len且不重复的单词。"""
    file = open(txt_file, 'r')
    f = file.read()
    file.close()
    all_words = re.findall("\w+", str.lower(f))  # Matching all words and convert them to lowercase.
    words = sorted(list(set(all_words)))  # Remove the same elements from the list and sort them.
    result = []
    for word in words:
        num = re.search('\d+', word)  # Remove numbers.
        w = re.search('[\u4e00-\u9fa5]', word)  # Remove Chinese words.
        if not num and not w and word not in exist_words and len(word) > min_len:
            result.append(word)

    return list(set(result))  # set(list)


def extract_phrase(txt_file,min_len=1):
    """提取短语列表中的短语,默认一行为一个短语"""
    result = []
    with open(txt_file, 'r') as file:
        for f in file:
            w = re.search('[\u4e00-\u9fa5]', f)  # Remove Chinese words.
            if not w and len(f.strip('\n')) > min_len:
                result.append(f.strip('\n'))
    return list(set(result))


def get_words(file):
    """提取文档中的单词及其音标和解释 或短语及其解释,返回列表,其中为元组。"""
    f = open(file, 'r', encoding='utf-16')
    text = f.read().replace('\n', '')
    f.close()
    txt = re.sub('\d+,\s?', '\n', text).split('\n')
    result = []
    for t in txt:
        if '[' in t:  # word
            regex = '(\w+)\s+(\[.*?])(.*?)$'
            s = re.findall(regex, t)
            if len(s):  # Word + Pronunciation + Translation
                result.append(s[0])
        else:  # phrase
            regex = '(.*?)\s+([\u4e00-\u9fa5].*?$)'
            s = re.findall(regex, t)
            if len(s):  # phrase+translation
                result.append((s[0][0].strip(), "", s[0][1]))
            elif len(t):  # phrase(without translation)
                result.append((t.strip(), "", ""))

    return result  # list [('group', '[gruːp]', 'n. 组;团体;'),(),()...]


def get_xml_words(file):
    f = open(file, 'r', encoding='utf-16')
    text = f.read()
    f.close()
    result = []
    regex = '.*?(.*?).*?.*?'
    items = re.findall(regex, text, re.S)
    for item in items:
        result.append((item[0], item[2], item[1]))
    return result  # list [('group', '[gruːp]', 'n. 组;团体;'),(),()...]


def get_chinese(file='', judge=[]):
    words = []
    with open(file, 'r') as file:
        for line in file:
            items = re.findall("(\d+)\s(\w+)\t+(.*?)##(.*)\n", line)
            for item in items:
                if item[1] not in judge:
                    judge.append(item[1])
                    words.append((item[1], item[2], item[3],))
    return words


def write_words_to_excel(excel_path, words_list):
    """将列表中单词或短语列表写入excel文件。excel_path:已经存在的excel文件路径;word_list:要写入的单词或短语列表"""
    exist_words = []
    try:
        excel = load_workbook(excel_path)
    except:
        # print('No Such File or Directory:', excel_path)
        return
    sheet_name = excel.get_sheet_names()  # Get the current sheet name
    sheet = excel.get_sheet_by_name(sheet_name[0])
    rows = sheet.max_row  # Get the max row number

    for rx in range(1, rows + 1):  # Go through the words in the current sheet
        exist_words.append(sheet.cell(row=rx, column=1).value)

    word_count = 0
    for words in words_list:
        if words[0] not in exist_words:
            word_count += 1
            for c in range(0, 3):
                sheet.cell(row=rows + word_count, column=c + 1).value = words[c]
        else:  # The word in the list already exists in the excel file
            # print(words)
            pass
    # print('新增 %d 行。' % word_count)
    excel.save(excel_path)

    return word_count


def translate(appKey='', secretKey='', q='', is_word=True):
    """通过有道翻译API翻译单词或短语。
    :param appKey:      你的应用ID
    :param secretKey:   您的应用密钥
    :param q:           要查询的单词或短语
    :param is_word:        是否为单词,默认为True
    :return:    返回字典,单词时返回{word:'',phonetic:'',explains:''},    短语时返回{phrase:'',translation:''}
    """
    httpClient = None
    myurl = '/api'
    fromLang = 'EN'
    toLang = 'zh-CHS'
    salt = random.randint(1, 65536)

    sign = appKey + q + str(salt) + secretKey
    m1 = md5()
    m1.update(sign.encode())
    sign = m1.hexdigest()
    myurl = myurl + '?appKey=' + appKey + '&q=' + urllib.parse.quote(
        q) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(salt) + '&sign=' + sign

    try:
        httpClient = http.client.HTTPConnection('openapi.youdao.com')
        httpClient.request('GET', myurl)

        # response是HTTPResponse对象
        response = eval(httpClient.getresponse().read().decode())
        if response['errorCode'] == '0':
            # print(response.read().decode())  # translate result
            if is_word:
                if 'basic' in response.keys():
                    explains = ''
                    if 'explains' in response['basic'].keys():
                        for item in response['basic']['explains']:
                            explains += item + ';'
                    if 'phonetic' in response['basic'].keys():
                        return {'word': q, 'phonetic': response['basic']['phonetic'], 'explains': explains.strip(';')}
                    else:
                        return {'word': q, 'phonetic': "", 'explains': explains.strip(';')}
                else:
                    return {'word': q, 'phonetic': "", 'explains': ''}
            else:
                if 'translation' in response.keys():
                    translation = ''
                    for item in response['translation']:
                        translation += item + ';'
                    return {'phrase': q, 'translation': translation}
                else:
                    return {'phrase': q, 'translation': ''}
        else:
            return response
    except Exception as e:
        pass
    finally:
        if httpClient:
            httpClient.close()


class Application():
    def __init__(self):
        self.root = Tk()
        self.root.title('词汇助手')
        self.root.resizable(False, False)
        """Initialization """
        self.english_articles_path = StringVar()  # 英文文章路径
        self.english_list_path = StringVar()  # 英文短语列表路径
        self.file_from_youdao_path = StringVar()  # 有道导出文件(xml or txt)
        self.excel_from_youdao_file_path = StringVar()  # 有道导出文件转换的excel
        self.label_youdao_file_to_excel_result = StringVar()  # 有道导出文件转换excel结果
        self.chinese_txt_file_path = StringVar()  # 中文成语文件
        self.excel_from_chinese_path = StringVar()  # 中文成语转换的Excel文件
        self.label_chinese_to_excel_result = StringVar()  # 中文成语转化Excel文件结果
        """ Function menu """
        self.menu_bar = Menu(self.root)
        self.menu_bar.add_command(label='关于', command=self.help_about)
        self.root.config(menu=self.menu_bar)  # == self.root['menu'] = self.menu_bar
        """Main Tab"""
        self.tab_menu = ttk.Notebook(self.root)
        self.tab_menu.grid(row=1, column=1)
        self.tab_extract = ttk.Frame(self.tab_menu)
        self.tab_menu.add(self.tab_extract, text='  单词提取    ')
        self.tab_convert = ttk.Frame(self.tab_menu)
        self.tab_menu.add(self.tab_convert, text='  格式转换    ')
        self.tab_conf = ttk.Frame(self.tab_menu)
        self.tab_menu.add(self.tab_conf, text='  配置信息    ')

        """格式转换"""
        # youdao file to Excel
        self.frm_word = LabelFrame(self.tab_convert, text='有道词典导出文件转换Excel')
        self.frm_word.grid(row=1, column=1, pady=10, padx=10)
        Entry(self.frm_word, textvariable=self.file_from_youdao_path).grid(row=1, column=1, padx=5)
        Button(self.frm_word, text='选择有道词典导出文件', command=self.choose_file_from_youdao_path).grid(row=1, column=2)
        Entry(self.frm_word, textvariable=self.excel_from_youdao_file_path).grid(row=2, column=1, pady=5)
        Button(self.frm_word, text=' 选择 Excel 文件路径 ', command=self.choose_excel_from_youdao_file_path).grid(row=2,
                                                                                                            column=2)
        Button(self.frm_word, text='开始转换', command=self.change_youdao_outfile_to_excel).grid(row=3, column=1, pady=5)
        Label(self.frm_word, textvariable=self.label_youdao_file_to_excel_result).grid(row=3, column=2)

        # chinese to excel
        self.frm_chinese = LabelFrame(self.tab_convert, text='百度中文导出文件转换Excel')
        self.frm_chinese.grid(row=2, column=1, pady=10)
        Entry(self.frm_chinese, textvariable=self.chinese_txt_file_path).grid(row=1, column=1, padx=5)
        Button(self.frm_chinese, text='选择百度中文导出文件', command=self.choose_chinese_txt_file_path).grid(row=1, column=2)
        Entry(self.frm_chinese, textvariable=self.excel_from_chinese_path).grid(row=2, column=1, pady=5)
        Button(self.frm_chinese, text=' 选择 Excel 文件路径 ', command=self.choose_excel_from_chinese_path).grid(row=2,
                                                                                                           column=2)
        Button(self.frm_chinese, text='开始转换', command=self.change_chinese_to_excel).grid(row=3, column=1, pady=5)
        Label(self.frm_chinese, textvariable=self.label_chinese_to_excel_result).grid(row=3, column=2)

        """配置信息"""
        self.frm_conf = LabelFrame(self.tab_conf, text='有道翻译API配置信息')
        self.frm_conf.grid(row=2, column=1, padx=10, pady=10)
        Label(self.frm_conf, text='输入您的应用ID:').grid(row=1, column=1)
        self.et_app_key = Entry(self.frm_conf)
        self.et_app_key.grid(row=1, column=2, padx=10, pady=10)
        Label(self.frm_conf, text='输入您的应用密钥:').grid(row=2, column=1)
        self.et_secret_key = Entry(self.frm_conf)
        self.et_secret_key.grid(row=2, column=2, padx=10, pady=10)
        Label(self.frm_conf, text='注:每次关闭程序自动删除ID').grid(row=3, column=2)

        """单词提取"""
        self.frm_extract = Frame(self.tab_extract)
        self.frm_extract.grid(row=1, column=1, pady=5)
        """Tab frame"""
        self.tab_ctrl = ttk.Notebook(self.frm_extract)
        self.tab_ctrl.grid(row=1, column=1)
        self.tab_word = ttk.Frame(self.tab_ctrl)
        self.tab_ctrl.add(self.tab_word, text='文章单词提取')
        self.tab_phrase = ttk.Frame(self.tab_ctrl)
        self.tab_ctrl.add(self.tab_phrase, text="列表短语提取")
        """tab_word"""
        self.frm_choose_word = Frame(self.tab_word)
        self.frm_choose_word.grid(row=1, column=1)
        Entry(self.frm_choose_word, textvariable=self.english_articles_path).grid(row=1, column=1, padx=5)
        Button(self.frm_choose_word, text='选择英文文章txt', command=self.choose_english_article_path).grid(row=1, column=2)
        Button(self.frm_choose_word, text='提取单词', command=self.extract_words_from_english_articles).grid(row=1,
                                                                                                         column=3)
        self.frm_word_btn = Frame(self.tab_word)
        self.frm_word_btn.grid(row=2, column=1)
        Button(self.frm_word_btn, text='保存为有道xml格式', command=self.save_word_as_xml).grid(row=1, column=2, padx=13)
        Button(self.frm_word_btn, text='翻译并保存为xml格式', command=self.translate_word).grid(row=1, column=3)
        """tab_phrase"""
        self.frm_choose_phrase = Frame(self.tab_phrase)
        self.frm_choose_phrase.grid(row=1, column=1)
        Entry(self.frm_choose_phrase, textvariable=self.english_list_path).grid(row=1, column=1, padx=5)
        Button(self.frm_choose_phrase, text='选择短语列表txt', command=self.choose_english_list_path).grid(row=1, column=2)
        Button(self.frm_choose_phrase, text='提取短语', command=self.extract_phrase_from_english_list).grid(row=1, column=3)
        self.frm_phrase_btn = Frame(self.tab_phrase)
        self.frm_phrase_btn.grid(row=2, column=1)
        Button(self.frm_phrase_btn, text='保存为有道xml格式', command=self.save_phrase_as_xml).grid(row=1, column=2, padx=13)
        Button(self.frm_phrase_btn, text='翻译并保存为xml格式', command=self.translate_phrase).grid(row=1, column=3)

        self.st_word_list = ScrolledText(self.frm_extract, width=32, height=8, background='#ffffff', font=('微软雅黑',), )
        self.st_word_list.grid(row=3, column=1)

    def choose_english_article_path(self):
        path_ = askopenfilename(filetypes=(("Text Files", "*.txt"), ("All files", "*.*")))
        self.english_articles_path.set(path_)

    def choose_english_list_path(self):
        path_ = askopenfilename(filetypes=(("Text Files", "*.txt"), ("All files", "*.*")))
        self.english_list_path.set(path_)

    def choose_file_from_youdao_path(self):
        path_ = askopenfilename(filetypes=(("Text files", "*.txt"), ("XML files", "*.xml")))
        self.file_from_youdao_path.set(path_)

    def choose_excel_from_youdao_file_path(self):
        path_ = askopenfilename(filetypes=(("Excel File", "*.xlsx*"),))
        self.excel_from_youdao_file_path.set(path_)

    def choose_chinese_txt_file_path(self):
        path_ = askopenfilename(filetypes=(("Text files", "*.txt"),))
        self.chinese_txt_file_path.set(path_)

    def choose_excel_from_chinese_path(self):
        path_ = askopenfilename(filetypes=(("Excel File", "*.xlsx*"),))
        self.excel_from_chinese_path.set(path_)

    def help_about(self):
        messagebox.showinfo("About", '    Version:     V 1.0' + __doc__)
        return

    def extract_words_from_english_articles(self):
        if self.english_articles_path.get() == '':
            messagebox.showinfo('提示', '请选择英文文章txt文件')
            return
        try:
            words = extract_words(txt_file=self.english_articles_path.get())
        except:
            messagebox.showerror('Error', '提取失败,请检查txt文件格式或编码')
            return
        self.st_word_list.delete(0.0, END)  # clear the scroll text
        for word in words:
            self.st_word_list.insert(END, word + '\n')
        self.st_word_list.insert(0.0, '\t共 %d 个单词\n\n' % len(words))

    def extract_phrase_from_english_list(self):
        if self.english_list_path.get() == '':
            messagebox.showinfo('提示', '请选择短语列表txt文件')
            return
        try:
            phrase = extract_phrase(txt_file=self.english_list_path.get())
        except:
            messagebox.showerror('Error', '提取失败,请检查txt文件格式或编码')
            return
        self.st_word_list.delete(0.0, END)  # clear
        for ph in phrase:
            self.st_word_list.insert(END, ph + '\n')
        self.st_word_list.insert(0.0, '\t共 %d 个短语\n\n' % len(phrase))

    def change_youdao_outfile_to_excel(self):
        if self.file_from_youdao_path.get() == '' or self.excel_from_youdao_file_path.get() == '':
            messagebox.showwarning('警告', '请选择有道词典导出的txt或xml文件以及要导出的excel文件')
            return
        if self.file_from_youdao_path.get().endswith('xml'):
            try:
                change_xml_words = write_words_to_excel(self.excel_from_youdao_file_path.get(),
                                                        get_xml_words(self.file_from_youdao_path.get()))
                self.label_youdao_file_to_excel_result.set('新增 %d 行' % change_xml_words)
            except:
                messagebox.showerror('Error', '转换失败,请关闭Excel文件或检查xml文件格式及编码。')
        elif self.file_from_youdao_path.get().endswith('txt'):
            try:
                change_words = write_words_to_excel(self.excel_from_youdao_file_path.get(),
                                                    get_words(self.file_from_youdao_path.get()))
                self.label_youdao_file_to_excel_result.set('新增 %d 行' % change_words)
            except:
                messagebox.showerror('Error', '转换失败,请关闭Excel文件或检查txt文件格式及编码。')

    def change_chinese_to_excel(self):
        if self.chinese_txt_file_path.get() == '' or self.excel_from_chinese_path.get() == '':
            messagebox.showwarning('警告', '请选择从百度中文导出的txt文件和要导入的Excel文件')
            return
        try:
            chinese_list = get_chinese(file=self.chinese_txt_file_path.get())
        except:
            messagebox.showwarning('警告', '请检查txt文件格式及编码,确保为百度中文导出文件。')
            return
        try:
            count = write_words_to_excel(self.excel_from_chinese_path.get(), chinese_list)
            self.label_chinese_to_excel_result.set('新增 %d 行' % count)
        except:
            messagebox.showwarning('警告', '转换失败,请关闭excel文件并重试')

    def save_as_xml(self, items, xml):
        xml_ = open(xml, 'w', encoding='utf-16')
        xml_.write('')
        for item in items:
            line = '\t' + item[0] + '\n\t'
            line += '' + item[1] + '\n\t'
            line += '' + item[2] + '\n\t\n\t\n'
            xml_.write(line)
        xml_.write('')
        xml_.close()
        messagebox.showinfo('Info', '保存成功')

    def save_word_as_xml(self):
        def swax():
            if self.english_articles_path.get() == '':
                messagebox.showinfo('提示', '请选择txt文章')
                return
            w_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
                                           defaultextension='.xml')
            if w_xml_file == '':
                return
            try:
                words = extract_words(txt_file=self.english_articles_path.get())
            except:
                messagebox.showerror('Error', '保存失败,请检查txt文件格式或编码')
                return
            fina_words = []
            for word in words:
                fina_words.append([word, '', ''])
            self.save_as_xml(fina_words, w_xml_file)
        _thread.start_new_thread(swax, ())

    def save_phrase_as_xml(self):
        def spax():
            if self.english_list_path.get() == '':
                messagebox.showinfo('提示', '请选择txt短语列表')
                return
            p_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
                                           defaultextension='.xml')
            if p_xml_file == '':
                return
            try:
                phrase = extract_phrase(txt_file=self.english_list_path.get())
            except:
                messagebox.showerror('Error', '保存失败,请检查txt文件格式或编码')
                return
            fina_phrase = []
            for ph in phrase:
                fina_phrase.append([ph, '', ''])
            self.save_as_xml(phrase, p_xml_file)
        _thread.start_new_thread(spax, ())

    def translate_word(self):
        def trans_word():
            if self.english_articles_path.get() == '':
                messagebox.showinfo('提示', '请选择txt文章')
                return
            if self.et_app_key.get() == '' or self.et_secret_key.get() == '':
                messagebox.showinfo('提示', '请点击“配置信息”按钮输入相应信息!')
                return

            w_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
                                           defaultextension='.xml')
            if w_xml_file == '':
                return
            try:
                words = extract_words(txt_file=self.english_articles_path.get())
            except:
                messagebox.showerror('Error', '翻译失败,请检查txt文件格式或编码')
                return
            translate_result = []
            self.st_word_list.insert(0.0, 'Please wait ...\n')
            self.st_word_list.update()
            for word in words:
                try:
                    youdao = translate(appKey=self.et_app_key.get(), secretKey=self.et_secret_key.get(), q=word)
                    if 'errorCode' in youdao.keys():
                        if youdao['errorCode'] == '108':
                            messagebox.showerror('error', '应用ID无效')
                            self.st_word_list.delete(1.0, 2.0)
                            return
                    temp = [youdao['word'], '',
                            '']
                    translate_result.append(temp)
                except Exception as err:
                    messagebox.showwarning('Warning', '翻译失败,请检查网络连接!')
            self.save_as_xml(translate_result, w_xml_file)
            self.st_word_list.delete(1.0, 2.0)
        _thread.start_new_thread(trans_word, ())

    def translate_phrase(self):
        def trans_phrase():
            if self.english_list_path.get() == '':
                messagebox.showinfo('提示', '请选择txt短语列表')
                return
            if self.et_app_key.get() == '' or self.et_secret_key.get() == '':
                messagebox.showinfo('提示', '请点击“配置信息”按钮输入相应信息!')
                return
            p_xml_file = asksaveasfilename(filetypes=(("xml files", "*.xml"), ("All files", "*.*")),
                                           defaultextension='.xml')
            if p_xml_file == '':
                return
            try:
                phrase = extract_phrase(txt_file=self.english_list_path.get())
            except:
                messagebox.showerror('Error', '翻译失败,请检查txt文件格式或编码')
                return
            self.st_word_list.insert(0.0, 'Please wait ...\n')
            self.st_word_list.update()
            translate_result = []
            try:
                for ph in phrase:
                    youdao = translate(appKey=self.et_app_key.get(), secretKey=self.et_secret_key.get(), q=ph,
                                       is_word=False)
                    if 'errorCode' in youdao.keys():
                        if youdao['errorCode'] == '108':  # 异常处理
                            messagebox.showerror('error', '应用ID无效')
                            self.st_word_list.delete(1.0, 2.0)
                            return
                    translate_result.append([youdao['phrase'], '', ''])
            except:
                messagebox.showwarning('Warning', '翻译失败,请检查网络连接!')
                return
            self.save_as_xml(translate_result, p_xml_file)
            self.st_word_list.delete(1.0, 2.0)
        _thread.start_new_thread(trans_phrase, ())


if __name__ == '__main__':
    app = Application()
    app.root.mainloop()

主要实现的功能如下所示:

  1. 将有道词典导出的txt或xml格式单词转换为Excel格式

  2. 分离全英文文章中的单词并去重

  3. 批量查询分离出的单词

  4. 将分离出的单词保存为可以导入有道词典的xml格式

  5. 将百度中文导出txt文件转换为Excel文件

另外,由于Python是基于C语言的底层库运行,所以如果电脑上没有python环境或者VS环境,请先安装vc_redist.x64.exe(32位系统安装vc_redist.x86.exe)以保证程序正常运行。

微软官方下载地址

或:https://www.microsoft.com/zh-CN/download/details.aspx?id=30679

python 有道翻译助手_第1张图片
公众号: squidward--

你可能感兴趣的:(python 有道翻译助手)