Python谷歌翻译(防封版)

前言

最近手头工作需要调用一下谷歌翻译,然后在网上找了一些方法,无奈发现都会被封。最后我写了个爬虫用浏览器模拟来模拟操作,这样速度较慢,但是不会被封。

方法一

直接调用googletrans包,该方法简单粗暴,但是会被封ip,小量数据可用。示例:

from googletrans import Translator

translator = Translator()
print(translator.translate('星期日', dest='en').text)

方法二

使用爬虫,用浏览器模拟操作(也试过不用浏览器,最后也会被封),该方法速度较慢,但是不会被封。示例:

from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from retry import retry


chrome_options = Options()

# 隐藏浏览器界面
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)


@retry(tries=3, delay=1)
def translate(input, target):
    base_url = 'https://translate.google.cn/#view=home&op=translate&sl=auto&tl=%s' % target

    if browser.current_url != base_url:
        browser.get(base_url)

    submit = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="source"]')))
    submit.clear()
    submit.send_keys(input)
    WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//span[@class="tlid-translation translation"]')))
    source = etree.HTML(browser.page_source)
    result = source.xpath('//span[@class="tlid-translation translation"]//text()')[0]

    return result


if __name__ == '__main__':
    for i in range(100):
        print(translate('中英翻译测试', target='en'))
        print(translate('再测试一下', target='en'))
        print(translate('hello world', target='zh-CN'))
    browser.quit()

你可能感兴趣的:(工具)