Python-入门实战(简单有道词典爬虫)

ps:初次接触爬虫,故参考了球友的代码  作者:tipire    https://www.jianshu.com/p/8537178639a8

from pyquery import PyQuery as pq
import requests
import threadpool


def download_html(word):
    output = {'Word': word}
    final_output = {}
    url = 'http://dict.youdao.com/w/eng/{}/'.format(word)
    try:
        r = requests.get(url)
        if r.status_code == 200:
            doc = pq(r.text)
            final_output = decode_html(doc, output)
            print(final_output)
    except Exception as e:
        print('抓取页面异常,抓取不到:' + word)
        return None
    return final_output


def decode_html(doc, output):
    output['Proc'] = ''
    output['Desc'] = ''
    for pro in doc.items('.baav .pronounce'):
        output['Proc'] = output['Proc'] + pro.text()

    for li in doc.items('#phrsListTab .trans-container ul li'):
        output['Desc'] = output['Desc'] + li.text()
    return output


word_list = ['spring', 'duck', 'python', 'beautiful', 'caption']
pool = threadpool.ThreadPool(10)
word_pool = threadpool.makeRequests(download_html, word_list)
[pool.putRequest(req) for req in word_pool]
pool.wait()
{'Word': 'caption', 'Proc': "英 ['kæpʃ(ə)n]美 ['kæpʃən]", 'Desc': 'n. 标题;字幕;说明;逮捕vt. 加上说明;加上标题'}
{'Word': 'python', 'Proc': "['paɪθɑn]", 'Desc': 'n. 巨蟒;大蟒n. (法)皮东(人名)'}
{'Word': 'duck', 'Proc': '英 [dʌk]美 [dʌk]', 'Desc': 'n. 鸭子;鸭肉;(英)宝贝儿;零分vi. 闪避;没入水中vt. 躲避;猛按…入水n. (Duck)人名;(德、葡、匈)杜克'}
{'Word': 'beautiful', 'Proc': "英 [ˈbju:tɪfl]美 ['bjʊtəfəl]", 'Desc': 'adj. 美丽的出色地出色的迷人的迷人地'}
{'Word': 'spring', 'Proc': '英 [sprɪŋ]美 [sprɪŋ]', 'Desc': 'n. 春天;弹簧;泉水;活力;跳跃adj. 春天的vi. 生长;涌出;跃出;裂开vt. 使跳起;使爆炸;突然提出;使弹开n. (Spring)人名;(德)施普林;(英、芬、瑞典)斯普林'}
[Finished in 0.5s]

 

你可能感兴趣的:(Python学习)