#代码比较粗糙只能一次性获得一个球队最近大名单的球员数据,要获取一个联赛的还要写一个迭代,懒就没写了,当时爬了欧洲五大顶级联赛的数据,是为了tableau可视化爬取的数据
import pymysql
from urllib.parse import urlencode
import requests
from lxml import etree
conn=pymysql.connect(host='localhost',port=3306,user='root',password='123456789',db='bra')
cur=conn.cursor()
url = "https://data.13322.com/team-362/2.html"
ua ='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
with requests.request('GET',url,headers = {'User-agent':ua}) as res:
content = res.text #获取HTML的内容
html = etree.HTML(content) #分析HTML,返回DOM根节点
#path = //div[@class='billboard-bd']//td//a/text()
wz1 = html.xpath( "//div[@class='team_zj']//td[@align='left']//a[@target='_blank']/@href")
for url1 in wz1:
ua ='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
with requests.request('GET',url1,headers = {'User-agent':ua}) as res:
content = res.text #获取HTML的内容
html = etree.HTML(content) #分析HTML,返回DOM根节点
titles = html.xpath( "//div[@class='player-info']//tr/td[2]/text()") #使用xpath函数,返回文本列表
orders = html.xpath("//div[@class='player-info']//tr/td[1]/text()")
orders1 = html.xpath("//div[@class='player-info']//tr/td[3]/text()")
titles1 = html.xpath( "//div[@class='player-info']//td//a/text()")
name=orders[0][3:]#姓名
age=orders[1][3:]#年龄
number=orders[2][5:]#球衣号码
status=orders[3][7:]#预计身价
height=titles[1][3:]#身高
birthdate=orders1[0][5:]#出生日期
weight=orders1[1][3:]#体重
Idiomaticfeet=orders1[2][-2:]#惯用脚
nationality=titles1[0]#国籍
location=titles1[-1]#位置
sqli="insert into fm_copy values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
cur.executemany(sqli,[
('甘冈',name,nationality,birthdate,age,height,weight,number,location,Idiomaticfeet,status)])
conn.commit()
conn.close()