Python实现爬取大陆大学排名,嵩天老师课程示例举一反三
代码
import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(ulist, html):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
if tds[2].string == '大陆':
ulist.append([tds[0].string, tds[1].string, tds[3].string])
def printUnivList(ulist, num):
print(f'{"大陆排名":^10}\t{"中国排名":^10}\t{"学校名称":{chr(12288)}^6}\t{"总分":^16}')
count = 0
for i in range(num):
count += 1
u = ulist[i]
print(f"{count:^16}\t{u[0]:^6}\t{u[1]:{chr(12288)}^10}\t{u[2]:^10}")
def main():
uinfo = []
url = 'http://www.zuihaodaxue.com/Greater_China_Ranking2019_3.html'
html = getHTMLText(url)
fillUnivList(uinfo, html)
printUnivList(uinfo, 25)
main()
结果如下