代码是同事写的,我把它单独抠出来,可以作为工具函数使用。当然,性能还是个问题,有待解决。
import random
import cookielib
import urllib
import urllib2
import HTMLParser
import re
cookie_support = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
user_agents = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
]
agent = random.choice(user_agents)
opener.addheaders = [("User-agent", agent), ("Accept", "*/*"), ('Referer', 'http://www.google.com')]
def unescape(text):
parser = HTMLParser.HTMLParser()
return (parser.unescape(text))
def TranslateByGoogle(text="", fromLang="en", toLang="zh-CN"):
base_link = "https://translate.google.cn/m?hl=%s&sl=%s&q=%s"
text = urllib.quote_plus(text.encode('utf8'))
link = base_link % (toLang, fromLang, text)
try:
raw_data = urllib2.urlopen(link).read()
data = raw_data.decode("utf-8")
expr = r'class="t0">(.*?)<'
re_result = re.findall(expr, data)
if (len(re_result) == 0):
result = ""
else:
result = unescape(re_result[0])
return (result)
except Exception, e:
print e