在使用Zotero PDF翻译插件的时候,感觉总是受到一些限制。Google翻译在国内无法使用,百度、deepl等翻译,都需要key。所以很大程度上,无法直接进行划词翻译。最常见的操作就是复制到浏览器中进行翻译。现在的需求就是:
** 能不能将复制这个动作自动化?**
我的思路就是:
对Zotero 插件进行修改,使其能够支持自定义的服务器访问。然后将需要翻译的内容,在服务器上,利用Pytho的爬虫功能,实现自动化的操作浏览器,并将结果返回到Zotero。
nodejs
依据官网的介绍,开发的具体步骤如下:
插件是基于Zotero插件模板.
可以运行如下命令进行构建:
git clone https://github.com/windingwind/zotero-pdf-translate.git cd zotero-pdf-translate npm install npm run build
插件生成在:
./builds/*.xpi
.添加新的翻译服务:
- 添加新的服务配置到:
src/utils/config.ts
>SERVICES
;- 添加一个与其他服务格式相同的新的任务处理过程到
src/modules/services/${serviceId}.ts
。 如果>程序运行成功,导出函数将翻译的结果放在data.result
中,如果运行失败,则抛出一个异常;- 在
src/modules/services.ts
中导入任务处理函数- 在
addon/chrome/locale/${lang}/addon.properties
中添加本地语言的服务名称:service.${serviceId}
- 构建与测试
import { TranslateTask, TranslateTaskProcessor } from "../../utils/translate";
export default <TranslateTaskProcessor>async function (data) {
const url = data.secret;
const reqBody = `data=${encodeURIComponent(data.raw)}&from=${encodeURIComponent(data.langfrom)}&to=${encodeURIComponent(data.langto)}`;
const xhr = await Zotero.HTTP.request("POST", url, {
responseType: "json",
body: reqBody,
});
if (xhr?.status !== 200) {
throw `Request error: ${xhr?.status}`;
}
data.result = xhr.response.data;
};
构建命令如下:
npm run build
python:
- flask作为服务器
- selenium:作为自动化工具
from flask import Flask, jsonify,request
import logging
from urllib.parse import urlparse
import seleniumwire.undetected_chromedriver as uc
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pyperclip
import time
import os
app = Flask(__name__)
class Translate():
def __init__(self,options=None,sw_options={},isUC=False) -> None:
self.opened_domain = {}
if options is None:
options = uc.ChromeOptions()
assert isinstance(options,uc.ChromeOptions)
self.options = options
self.sw_options = sw_options
if isUC:
self.driver = uc.Chrome(
options=self.options,
seleniumwire_options=self.sw_options
)
else:
self.driver = webdriver.Chrome(options=self.options)
def openUrl(self,url):
domain = urlparse(url).netloc
handles = self.driver.window_handles
# 判断当前浏览器是否打开
if len(self.opened_domain) < 1:
self.driver.get(url)
self.opened_domain[domain] = self.driver.current_window_handle
elif domain not in self.opened_domain.keys():
self.driver.execute_script(f"window.open('{url}', '_blank')")
wait = WebDriverWait(self.driver, timeout=10, poll_frequency=1, ignored_exceptions=[TimeoutException])
wait.until(EC.new_window_is_opened(handles))
new_handles = self.driver.window_handles
new_handle = [handle for handle in new_handles if handle not in handles]
assert new_handle is not []
self.opened_domain[domain] = new_handle[0]
self.driver.switch_to.window(self.opened_domain[domain])
def translate(self,url:str,data:str,transFun:callable):
self.openUrl(url)
if transFun is None:
print("翻译函数为空!!!")
return
result = transFun(data)
# data = {'data': result}
return result
# return jsonify(data)
def deepl(self,data):
if EC.visibility_of_element_located((By.XPATH, '//*[@id="panelTranslateText"]/div[1]/div[2]/section[1]/div[3]/div[2]/d-textarea/div'))(self.driver):
clear_button = self.driver.find_element(By.XPATH, '//*[@id="panelTranslateText"]/div[1]/div[2]/section[1]/div[3]/div[2]/d-textarea/div')
clear_button.click()
lang = self.driver.find_element(By.TAG_NAME,"html").get_attribute('lang')
if lang == "en":
tag_text = "Like translation"
copy_text = "Copy to clipboard"
elif lang =="zh":
tag_text = "喜欢该翻译"
copy_text = "复制到剪贴板"
el_input = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="panelTranslateText"]/div[1]/div[2]/section[1]/div[3]/div[2]/d-textarea/div')))
el_input.clear()
el_input.click()
el_input.send_keys(data)
WebDriverWait(self.driver, timeout=10).until(lambda d: d.find_element(By.CSS_SELECTOR,f'button[aria-label="{tag_text}"]'))
time.sleep(2)
copy_button = self.driver.find_element(By.CSS_SELECTOR,f'button[data-testid="translator-target-toolbar-copy"][aria-label="{copy_text}"]')
copy_button.click()
result = pyperclip.paste()
return result
def deeplFY(self,input,_from="en",_to="zh"):
base_url = f"https://www.deepl.com/translator#{_from}/{_to}/"
result = self.translate(base_url,input,self.deepl)
return result
def baidu(self,data):
submit = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="baidu_translate_input"]')))
submit.clear()
submit.send_keys(data)
el_output = WebDriverWait(self.driver, timeout=10).until(lambda d: d.find_element(By.XPATH,'//*[@id="main-outer"]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/div[1]/p[2]'))
result = el_output.text
return result
def bdFY(self,data:str="Hello",_from:str="en",_to:str="zh"):
url = f"https://fanyi.baidu.com/translate#{_from}/{_to}"
return self.translate(url,data,self.baidu)
transDriver = Translate()
log_file_name = 'logger-' + time.strftime('%Y-%m-%d', time.localtime(time.time())) + '.log'
log_file_path = os.path.join(os.path.abspath(os.curdir),log_file_name)
handler = logging.FileHandler(log_file_path,encoding='UTF-8')
logging_format = logging.Formatter(
'%(asctime)s-%(message)s')
handler.setFormatter(logging_format)
app.logger.addHandler(handler)
print(f"log location:{log_file_path}")
@app.route('/', methods=['POST','GET'])
def root():
rq_data = request.form.get("data")
print(rq_data)
# 在这里编写API逻辑
translators = {
"BAIDUI":'/bdFY',
"DEEPL":'/deeplFY'
}
data = {'data': translators}
return jsonify(data)
@app.route('/bdFY',methods=['POST'])
def bdFY():
_data = request.form.get("data")
_from = request.form.get("from")
_to = request.form.get("to")
app.logger.info(f"data:{_data}")
try:
result = transDriver.bdFY(_data)
except Exception as e:
result = str(e)
app.logger.info(f"result:{result}")
return jsonify({'data':result})
@app.route('/deeplFY',methods=['POST'])
def deeplFY():
_data = request.form.get("data")
_from = request.form.get("from")
_to = request.form.get("to")
app.logger.info(f"data:{_data}")
try:
result = transDriver.deeplFY(_data)
except Exception as e:
result = str(e)
app.logger.info(f"result:{result}")
return jsonify({'data':result})
if __name__ == '__main__':
app.run(port=80)