1. 打印所有问题的链接地址
from urllib.request import urlopen from bs4 import BeautifulSoup import re html = urlopen('https://leetcode.com/problemset/algorithms/') bsObj = BeautifulSoup(html, 'lxml') for link in bsObj.find_all('a', href=re.compile("^/problems/.*")): if 'href' in link.attrs: print(link.attrs['href'])
2. 显示每个问题的接受成功率
from urllib.request import urlopen from bs4 import BeautifulSoup import re html = urlopen('https://leetcode.com/problemset/algorithms/') bsObj = BeautifulSoup(html, 'lxml') def getinfo(link): soup = BeautifulSoup(urlopen(link), 'lxml') try: Accept = soup.find('span', {'class':'total-ac text-info'}).strong.get_text() Submit = soup.find('span', {'class':'total-submit text-info'}).strong.get_text() #print('Accepted rate is {}%'.format(round(float(Accept)/float(Submit) * 100, 2))) print('Accepted rate is {:.2f}%'.format(float(Accept)/float(Submit) * 100)) print('\n') except AttributeError: print('can not access, fuck') def get_problem(): for link in bsObj.find_all('a', href=re.compile("^/problems/.*")): if 'href' in link.attrs: l = link.attrs['href'] print(l.split('/')[2]) getinfo('https://leetcode.com/' + l) if __name__ == '__main__': get_problem()