维文、藏文等特殊语种翻译

最近在研究语音识别,主要是维语、藏语等小众语言的识别,调用 JTHS 的SDK后,识别结果是维文或者藏文,看不懂啊,必须要翻译成汉语才行,于是又调用 JTHS 官方提供的Web API,但是成功率太低,反馈给技术人员,暂未得到解决,很是失望。于是在网上找到了“中国民族语文翻译局”,这里刚好有这些特殊语种的翻译,于是研究一番,就着手写了Python和C#版本的翻译接口。效果还不错,于是写篇文章记录下。

话不多说,直接贴代码。

Python版

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
 
'''
@Author  :   Anuo.
 
@License :   (C) Copyright 2019, Anuo's Studio
 
@Contact :   [email protected]
 
@Software:   VS2017
 
@File:   mzywfyj_spider.py
 
@Time:   Feb 20,2019
 
@Desc:   中国民族语文翻译局,翻译爬虫,网址:http://www.mzywfy.org.cn/translate.jsp
 
'''


import urllib.request
import urllib.parse
import json


def mzywfyj_translate(trans_text, trans_from, trans_to, trans_url):
    '''翻译'''
    
    mzywfyj_url = "http://www.mzywfy.org.cn/ajaxservlet"
    data = {}
    
    data['src_text'] = trans_text
    data['from'] = trans_from
    data['to'] = trans_to
    data['url'] = trans_url
    
    data = urllib.parse.urlencode(data).encode('utf-8')
    
    headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"}
    headers['Referer'] = 'http://www.mzywfy.org.cn/translate.jsp';  # 这个头必须加,不然不能成功
    fanyi_re = urllib.request.Request(mzywfyj_url, data, headers)
    fanyi_response = urllib.request.urlopen(fanyi_re)
    ret = fanyi_response.read().decode('utf-8')
    print(ret)


if __name__ == "__main__":

    # 可以根据自己的需要设置参数,然后调用方法mzywfyj_translate()
    # 汉文 -> 维吾尔文
    trans_text = '你好'
    trans_from = 'zh'
    trans_to = 'uy'
    trans_url = 2
    mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
    # 维吾尔文 -> 汉文
    trans_text = 'ياخشىمۇ سىز'
    trans_from = 'uy'
    trans_to = 'zh'
    trans_url = 7
    mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
    
    
    # 汉文 ->  藏文
    trans_text = '你好'
    trans_from = 'zh'
    trans_to = 'ti'
    trans_url = 1
    mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
    # 藏文 ->  汉文
    trans_text = 'སྐུ་ཁམས་བཟང་'
    trans_from = 'ti'
    trans_to = 'zh'
    trans_url = 6
    mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
    
    
    # 汉文 ->  蒙古文
    trans_text  = '你好'
    trans_from = 'zh'
    trans_to = 'mo'
    trans_url = 0
    mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)
    # 蒙古文 ->  汉文
    trans_text = '︽ ᠰᠠᠢᠢᠨ ᠪᠠᠶᠢᠨ᠎ᠠ ᠤᠣ ︖ '
    trans_from = 'mo'
    trans_to = 'zh'
    trans_url = 5
    mzywfyj_translate(trans_text, trans_from, trans_to, trans_url)

C#版

    /// 
    /// 翻译
    /// 
    /// 需要翻译的内容
    /// 翻译成功返回翻译结果,翻译失败返回错误信息
    /// 翻译成功失败,true-成功;false-失败
    public bool Translate(string transText,  string from, string to, string url, out string transResult)
    {
        transResult = "";
        string requestUrl = "http://www.mzywfy.org.cn/ajaxservlet";

        try
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(requestUrl);
            request.Timeout = 5000;
            request.Method = "POST";
            request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36";
            request.Accept = "application/json, text/javascript, */*; q=0.01";
            request.Referer = "http://www.mzywfy.org.cn/translate.jsp";

            string send = string.Format("src_text={0}&from={1}&to={2}&url={3}", transText, from, to, url);
            byte[] postData = Encoding.UTF8.GetBytes(send);
            request.ContentLength = postData.Length;

            Stream stream = request.GetRequestStream();
            stream.Write(postData, 0, postData.Length);
            stream.Close();
            stream.Dispose();

            bool success = false;
            using (HttpWebResponse webResponse = (HttpWebResponse)request.GetResponse())
            using (StreamReader responseStream = new StreamReader(webResponse.GetResponseStream()))
            {
                if (webResponse.StatusCode == HttpStatusCode.OK)
                {
                    success = true;
                    string retJson = responseStream.ReadToEnd();  
                    JObject jo = (JObject)JsonConvert.DeserializeObject(retJson);  // 需要引用 Newtonsoft.Json.dll
                    transResult = jo["tgt_text"].ToString().Trim();
                }
            }

            return success;
        }
        catch (Exception ex)
        {
            transResult = ex.ToString();
            return false;
        }
    }

Just so so

Anuo.
Chengdu
Feb 20,2019

你可能感兴趣的:(维文、藏文等特殊语种翻译)