书写一个翻译器-python

1.掌握python,爬虫的相关知识

2.开始实践,环境准备(安装googletrans,request,beautifulsoup库)

3.选取目标 - 谷歌,金山词霸

4.上代码

from googletrans import Translator
import requests, re
from bs4 import BeautifulSoup
import sys

# anthor : Comiii
# 2018/12/4
# purpose : university competition

class Tranlate():
    Result = ""
    # 谷歌库 -- 有其他人写好的一个库文件,直接引用
    def __init__(self, text, flag):
        translator = Translator()
        if (flag == 1):  # 中文
            result = translator.translate(text, dest="EN")
            # print(result.text)
        elif (flag == 2):  # 英语
            result = translator.translate(text, dest="zh-CN")
            # print(result.text)
        elif (flag == 3):  # 日语
            result = translator.translate(text, dest="ja")
        elif(flag == 4):
            result = translator.translate(text, dest="zh-CN")
        self.Result = result.text


class Spider():
    # 爬取金山词霸
    Result = ''
    Soup = ''

    def __init__(self, KWord):

#         url="http://www.youdao.com/w/"+KWord+"/#keyfrom=dict2.top"  有道词典
        url = "http://www.iciba.com/" + KWord  # 金山词霸,所有查找类型一个形式

        bs = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'}
        r = requests.get(url, headers=bs, timeout=60)
        self.Result = r.text
        self.CheckBeautifulsoup()

    #  不总是有效 into 有道词典,useful into 金山词霸
    #  beautifulsoup
    def CheckBeautifulsoup(self):
        soup = BeautifulSoup(self.Result, "html.parser")
        # 词霸翻译器爬取
        try:
            #  
你在说什么?
for div in soup.find_all(name='div', style="width: 580px; margin-top: 15px; font-size: 18px; line-height: 24px; color: #333333;"): soup = div.find(text=True).strip() self.Soup = soup except: print(" ") # 网页爬取 try: for li in soup.find_all(name='li', attrs='clearfix'): for span in li.find_all(name='span'): soup = span.find(text=True).strip() # print(soup) # 多个结果,全部显示,最终使用 self.Soup = soup # 单个结果,可能翻译对象不对 # print(soup) except: print("") # 不想写了怎么办呜呜 .QAQ . # .. .. # .. ... # re正则表达式 def CheckRe(self): pattern = re.compile(r'') if __name__ == "__main__": text = "你好" flag = 1 # 不同flag 代表不同的翻译类别 # print(text) # print(flag) tra = Tranlate(text,flag) #google print(tra.Result) kingt = Spider(text) # 金山词霸,会出现有结果却不显示的问题 print(kingt.Soup)

5.总结:太简单了,一个轻量级网页爬取,没什么好总结的!

你可能感兴趣的:(python)