python爬虫实战 爬取一比分网的球员数据存入MySQL数据库

#代码比较粗糙只能一次性获得一个球队最近大名单的球员数据,要获取一个联赛的还要写一个迭代,懒就没写了,当时爬了欧洲五大顶级联赛的数据,是为了tableau可视化爬取的数据

import pymysql
from urllib.parse import urlencode
import requests
from lxml import etree
conn=pymysql.connect(host='localhost',port=3306,user='root',password='123456789',db='bra')
cur=conn.cursor()
url = "https://data.13322.com/team-362/2.html"
ua ='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
with requests.request('GET',url,headers = {'User-agent':ua}) as res:
    content = res.text          #获取HTML的内容
    html = etree.HTML(content)  #分析HTML,返回DOM根节点
    #path = //div[@class='billboard-bd']//td//a/text()
    wz1 = html.xpath( "//div[@class='team_zj']//td[@align='left']//a[@target='_blank']/@href")
for url1 in wz1:
    ua ='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    with requests.request('GET',url1,headers = {'User-agent':ua}) as res:
        content = res.text          #获取HTML的内容
        html = etree.HTML(content)  #分析HTML,返回DOM根节点
        titles = html.xpath( "//div[@class='player-info']//tr/td[2]/text()")  #使用xpath函数,返回文本列表
        orders = html.xpath("//div[@class='player-info']//tr/td[1]/text()")
        orders1 = html.xpath("//div[@class='player-info']//tr/td[3]/text()")
        titles1 = html.xpath( "//div[@class='player-info']//td//a/text()")
        name=orders[0][3:]#姓名
        age=orders[1][3:]#年龄
        number=orders[2][5:]#球衣号码
        status=orders[3][7:]#预计身价
        height=titles[1][3:]#身高
        birthdate=orders1[0][5:]#出生日期
        weight=orders1[1][3:]#体重
        Idiomaticfeet=orders1[2][-2:]#惯用脚
        nationality=titles1[0]#国籍
        location=titles1[-1]#位置
        sqli="insert into fm_copy values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        cur.executemany(sqli,[
    ('甘冈',name,nationality,birthdate,age,height,weight,number,location,Idiomaticfeet,status)])
    conn.commit()
conn.close()

你可能感兴趣的:(python)