抓取 leetcode 问题列表

1. 打印所有问题的链接地址


from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

html = urlopen('https://leetcode.com/problemset/algorithms/')
bsObj = BeautifulSoup(html, 'lxml')

for link in bsObj.find_all('a', href=re.compile("^/problems/.*")):
    if 'href' in link.attrs:
        print(link.attrs['href'])




2. 显示每个问题的接受成功率



from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

html = urlopen('https://leetcode.com/problemset/algorithms/')
bsObj = BeautifulSoup(html, 'lxml')

def getinfo(link):
    soup = BeautifulSoup(urlopen(link), 'lxml')
    try:
        Accept = soup.find('span', {'class':'total-ac text-info'}).strong.get_text()
        Submit = soup.find('span', {'class':'total-submit text-info'}).strong.get_text()
        #print('Accepted rate is {}%'.format(round(float(Accept)/float(Submit) * 100, 2)))
        print('Accepted rate is {:.2f}%'.format(float(Accept)/float(Submit) * 100))
        print('\n')
    except AttributeError:
        print('can not access, fuck')


def get_problem():
    for link in bsObj.find_all('a', href=re.compile("^/problems/.*")):
        if 'href' in link.attrs:
            l = link.attrs['href']
            print(l.split('/')[2])
            getinfo('https://leetcode.com/' + l)

if __name__ == '__main__':
    get_problem()





你可能感兴趣的:(python,爬虫)