python 爬取数据(CBA所有球队数据) -爬虫

获取CBA官网各个球队的相关信息。

import requests
from bs4 import BeautifulSoup
import csv
name=[]
defen=[]
lanban=[]
zugong=[]
gaimao=[]
qiangduan=[]
shiwu=[]
fangui=[]
list1=[]
data=[]
def fun(url):
    def getHMLText(url):
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
            }
            r=requests.get(url,timeout=30,headers=headers)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except Exception as e:
            print("产生的异常是",e)  #e.status_code 状态码
    html=getHMLText(url)
    for i in range(4,24,2):
        list1.append(BeautifulSoup(html,"html.parser").find_all("div",class_='bbgg')[0].table.select("td")[i].text)
    name.append( BeautifulSoup(html,"html.parser").find_all("div",class_='content-box')[0].select("span")[0].text )
    defen.append(list1[0])
    lanban.append(list1[3])
    zugong.append(list1[4])
    gaimao.append(list1[5])
    qiangduan.append(list1[6])
    shiwu.append(list1[7])
    fangui.append(list1[8])
    
for i in [10728,10273,10722,10724,10727,10721,10730,10729,10361,11072,11380,10731,10725,10734,10726,10326,10360,10793,10732]:
    url='https://www.leisu.com/data/lanqiu/team-'+str(i)
    
for i in  range(len(name)):
    data.append([name[i],defen[i],lanban[i],zugong[i],gaimao[i],qiangduan[i],shiwu[i],fangui[i]])
def saveListCSV(fileName,aList):#生成表格
    try:
        with open(fileName,'w',newline='')as fp:
            writer = csv.writer(fp)
            writer.writerow(["球队名字", "场均得分","场均篮板","场均助攻","场均盖帽","场均抢断","场均失误","场均犯规"])
            for item in aList:
                writer.writerow(item)
            print('{0}保存成功!共{1}条记录'.format(fileName,len(aList)))
    except IOError as err:
        print(fileName,'文件创建错误:',err)   
saveListCSV('CBA_data.csv',dt) 

python 爬取数据(CBA所有球队数据) -爬虫_第1张图片

你可能感兴趣的:(数据爬取,大数据,爬虫)