爬虫初学——爬取中国大学排名并存为csv文件

链接:软科中国最好大学排名2016

代码:

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Mon May 27 21:10:59 2019
 4 
 5 @author: Benny
 6 """
 7 
 8 import csv
 9 import os
10 import requests
11 import pandas
12 from bs4 import BeautifulSoup
13 allUniv = []
14 def getHTMLText(url):
15     try:
16         r = requests.get(url, timeout=30)
17         r.raise_for_status()
18         r.encoding = 'utf-8'
19         return r.text
20     except:
21         return ""
22 def fillUnivList(soup):
23     data = soup.find_all('tr')
24     for tr in data:
25         ltd = tr.find_all('td')
26         if len(ltd)==0:
27             continue
28         singleUniv = []
29         for td in ltd:
30             singleUniv.append(td.string)
31         allUniv.append(singleUniv)
32 def writercsv(save_road,num,title):
33     if os.path.isfile(save_roaad):
34         with open(save_road,'a',newline='')as f:
35             csv_write=csv.writer(f,dialect='excel')
36             for i in range(num):
37                 u=allUniv[i]
38                 csv_write.writerow(u)
39     else:
40          with open(save_road,'w',newline='')as f:
41             csv_write=csv.writer(f,dialect='excel')
42             csv_write.writerow(title)
43             for i in range(num):
44                 u=allUniv[i]
45                 csv_write.writerow(u)
46  
47 title=["排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研究合作","成果转化"]
48 save_road="C:\\Users\\Benny\\Desktop\\Python\\Python练习\sqlit_test02.csv"
49 def main():
50     url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html'
51     html = getHTMLText(url)
52     soup = BeautifulSoup(html, "html.parser")
53     fillUnivList(soup)
54     writercsv(save_road,10,title)
55 main()

文件截屏如下:(这里只是保存了前十名的数据,可以通过更改num来保存更多)

爬虫初学——爬取中国大学排名并存为csv文件_第1张图片

 

转载于:https://www.cnblogs.com/shuxincheng/p/10933546.html

你可能感兴趣的:(爬虫初学——爬取中国大学排名并存为csv文件)