acwing查看做题总数

import re
import requests
from lxml import etree
class Spider(object):

def __init__(self):
    self.base_url = 'https://www.acwing.com/problem/{}/'
    self.headers = {
        'Cookie': 'csrftoken=mixU7wxaV35yyyCDhqbXcIoW3z3Ms0NH31jbbqH; sessionid='
                  '344bo4nowvp9misa9suynjiwz2i5jcof; file_2922585_readed=""; file_2302034_readed=""',
        'Referer': 'https://www.acwing.com/about/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                      ' (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
    }
    self.count = 0
def get_html(self, url):
    html = requests.get(
        url=url,
        headers=self.headers
    ).text
    return html
def xpath_func(self, html):
    name_bds = '//tbody/tr[./td/span[@title="已通过这道题目"]]/td/a/text()'
    base_obj = etree.HTML(html)
    name_lists = base_obj.xpath(name_bds)
    L = []
    for i in name_lists:
        L.append(i.strip())
    return L
def re_func(self, html, re_bds):
    pattern = re.compile(re_bds, re.S)
    re_list = pattern.findall(html)
    return re_list
def parse_html(self, url):
    html = self.get_html(url)
    L = self.xpath_func(html)
    return L
def run(self):
    warning = input('[Skrill下载](https://www.gendan5.com/wallet/Skrill.html)您马上就要爬取acwing了,看一下你的做题数,您的劳动成果将会在下面展示出来,确定要看吗?(Y/N)')
    if warning == 'Y':
        print('爬虫系统已经启动...正在努力抓取,请稍等....')
        print('+---------------------------------+')
        print('|            name                 |')
        print('+---------------------------------+')
        for i in range(1, 80):
            url = self.base_url.format(i)
            L = self.parse_html(url)
            for _ in L:
                self.count += 1
                print('|   ' + _)
        print('+---------------------------------+')
        print('经过您的不懈努力,您一共做了' + str(self.count) + '道题,继续努力!!')
    else:
        print('已经退出,你这个弱者')

if name == '__main__':

spider = Spider()
spider.run()

你可能感兴趣的:(python)