import re
import urllib
import urllib.parse
import urllib.request
import time
url_google = 'http://translate.google.cn'
reg_text = re.compile(r'(?<=TRANSLATED_TEXT=).*?;')
user_agent = r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' \
r'Chrome/44.0.2403.157 Safari/537.36'
def translateGoogle(text, f='zh-cn', t='en'):
values = {'hl': 'zh-cn', 'ie': 'utf-8', 'text': text, 'langpair': '%s|%s' % (f, t)}
value = urllib.parse.urlencode(values)
# print(value)
req = urllib.request.Request(url_google + '?' + value)
# print(req)
req.add_header('User-Agent', user_agent)
response = urllib.request.urlopen(req)
content = response.read().decode('utf-8')
data = reg_text.search(content)
result = data.group(0).strip(';').strip('\'')
print(result)
#读取数据文本
#进行匹配
text_list = ['我是中国人', '我是中国人', '我是中国人', '我是中国人', '我是中国人']
time1 = time.time()
time_google = 0
for text in text_list:
translateGoogle(text)
实验结果:
C:\python35\python.exe C:/Users/User/PycharmProjects/nlpdemo/matchchinese/google_translate.py
ie=utf-8&hl=zh-cn&text=%E6%88%91%E6%98%AF%E4%B8%AD%E5%9B%BD%E4%BA%BA&langpair=zh-cn%7Cen
0x0000000001239048>
I am Chinese
ie=utf-8&hl=zh-cn&text=%E6%88%91%E6%98%AF%E4%B8%AD%E5%9B%BD%E4%BA%BA&langpair=zh-cn%7Cen
0x0000000001239048>
I am Chinese
ie=utf-8&hl=zh-cn&text=%E6%88%91%E6%98%AF%E4%B8%AD%E5%9B%BD%E4%BA%BA&langpair=zh-cn%7Cen
0x0000000001239048>
I am Chinese
ie=utf-8&hl=zh-cn&text=%E6%88%91%E6%98%AF%E4%B8%AD%E5%9B%BD%E4%BA%BA&langpair=zh-cn%7Cen
0x0000000001239048>
I am Chinese
ie=utf-8&hl=zh-cn&text=%E6%88%91%E6%98%AF%E4%B8%AD%E5%9B%BD%E4%BA%BA&langpair=zh-cn%7Cen
0x0000000001239048>
I am Chinese
Process finished with exit code 0
http请求
(1)添加消息头
(2)请求信息转换成urlcode的编码格式
(3)模拟浏览器访问
(4)获取返回结果