python爬取数据网站多网页数据

文章目录

    • 使用库
      • requests
      • selenium
    • 代码示例
      • 爬取LPL的比赛链接
      • 爬取比赛数据内容
      • 爬取各英雄信息

使用库

requests

是用Python语言编写,基于urllib的HTTP库,满足HTTP测试需求
安装:pip install requests
函数功能

  1. 发起HTTP请求,获得url对应的网页内容
import requests
response = requests.get('https://www.baidu.com/')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)
print(response.cookies)

其他的一些请求方式

import requests
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')

selenium

Selenium是一个自动化测试工具,利用它可以驱动浏览器执行特定的动作,如点击、下拉等操作,同时还可以获取浏览器当前呈现的页面的源代码,做到可见即可爬。对于一些JavaScript动态渲染的页面来说,这种抓取方式十分有效。

使用google浏览器驱动器
chromeDriver下载地址http://chromedriver.storage.googleapis.com/index.html

按照图示位置,复制爬取内容的xpath路径
python爬取数据网站多网页数据_第1张图片

代码示例

爬取LPL的比赛链接

import requests
import os
import csv
import pandas as pd
from bs4 import BeautifulSoup

url = 'https://www.wanplus.com/event/820.html'
root = "D://spidersheet//"
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text,"html.parser")
# print(soup.prettify())
if not os.path.exists(root):
	os.mkdir(root)
else:
	team = []
	link = []
	matchname=soup.find('div',class_='bread_nav').find_all('a')[-1].text

	for match in soup.find('div',class_='new-match end-match').find_all('li'):
		link.append(match.find('a').get('href'))
		team.append(match.find_all('span')[0].string+' VS '+match.find_all('span')[1].string)
	for i in range(len(link)):
		link[i]='https://www.wanplus.com'+link[i]
	dataframe = pd.DataFrame({'Team' : team,'Link': link})
	print(dataframe)
	dataframe.to_csv(root + matchname+".csv",index = False,sep=',',encoding = 'utf_8_sig')

爬取比赛数据内容

import requests
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
from openpyxl import Workbook 
import os
import numpy as np
from selenium.webdriver.common.by import By
import selenium.webdriver.support.ui as ui
import pandas as pd
import re

#选手数据读取
def player(browser):
	playername =[]
	playerKDA = []
	playergolden =[]
	CreepScore=[]
	playerdamage=[]
	playerdamagetolerated=[]

	for player in browser.find_elements_by_class_name('player'):
		playertext = player.text.split('\n')
		while '' in playertext:
			playertext.remove('')
		playername.append(playertext[0])
		playerKDA.append(playertext[4])
		playergolden.append(playertext[7])
		CreepScore.append(playertext[10])
		playerdamage.append(playertext[13])
		playerdamagetolerated.append(playertext[16])
		playername.append(playertext[-4])
		playerKDA.append(playertext[6])
		playergolden.append(playertext[9])
		CreepScore.append(playertext[12])
		playerdamage.append(playertext[15])
		playerdamagetolerated.append(playertext[18])
	# print(playername)
	dataframe = pd.DataFrame({'PlayerName' : playername,'PlayerKDA': playerKDA,'PlayerGolden': playergolden,\
	'CreepScore':CreepScore,'PlayerDamage': playerdamage,'PlayerDamageTolerated':playerdamagetolerated})
	print(dataframe)
	return(dataframe,playername)


#选手百分比数据
def percentage(browser,playername):
	#teamposition = [Team1+'Top',Team1+'Jun',Team1+'Mid',Team1+'Bot',Team1+'Sup',Team2+'Top',Team2+'Jun',Team2+'Mid',Team2+'Bot',Team2+'Sup']
	teamposition = [playername[0],playername[2],playername[4],playername[6],playername[8],playername[1],playername[3],playername[5],playername[7],playername[9]]
	damagepercent = []
	damagetoleratedpercent =[]
	goldenpercent = []
	time.sleep(1)
	for a in browser.find_elements_by_class_name('highcharts-text-outline'):
		damagepercent.append(a.text.split(' ')[1])
	browser.find_element_by_xpath('//*[@id="percentage"]/ul/li[2]/a').click()
	time.sleep(1)
	for b in browser.find_elements_by_class_name('highcharts-text-outline'):
		damagetoleratedpercent.append(b.text.split(' ')[1])
	browser.find_element_by_xpath('//*[@id="percentage"]/ul/li[3]/a').click()
	time.sleep(1)
	for c in browser.find_elements_by_class_name('highcharts-text-outline'):
		goldenpercent.append(c.text.split(' ')[1])
	dataframe = pd.DataFrame({'PlayerName':teamposition,'DamagePercent':damagepercent,'DamageToleratedPercent':damagetoleratedpercent,\
		'GoldenPercent':goldenpercent})
	print(dataframe)
	return(dataframe)
#选手平均数据
def average(browser,playername):
	averagedamage = []
	averagedamagetolerated = []
	averagekilldamage = []
	averagekilleddamagetolerated=[]
	left = browser.find_element_by_xpath('html/body/div/div[3]/div[2]/div[1]/ul/li[2]/p').text[0]
	right = browser.find_element_by_xpath('html/body/div/div[3]/div[2]/div[1]/ul/li[2]/p').text[-1]
	leftcolor = browser.find_element_by_xpath('//*[@id="player-stats"]/ul/li[2]/em[1]').get_attribute("class")
	if leftcolor == 'blue':
		for i in browser.find_elements_by_css_selector("[class='left blue'],[class='right red']"):
			averagedamage.append(i.text)
		browser.find_element_by_xpath('//*[@id="line-tab"]/ul/li[2]/a').click()
		for i in browser.find_elements_by_css_selector("[class='left blue'],[class='right red']"):
			averagedamagetolerated.append(i.text)
		browser.find_element_by_xpath('//*[@id="line-tab"]/ul/li[3]/a').click()
		for i in browser.find_elements_by_css_selector("[class='left blue'],[class='right red']"):
			averagekilldamage.append(i.text)
		browser.find_element_by_xpath('//*[@id="line-tab"]/ul/li[4]/a').click()
		for i in browser.find_elements_by_css_selector("[class='left blue'],[class='right red']"):
			averagekilleddamagetolerated.append(i.text)
	else:
		for i in browser.find_elements_by_css_selector("[class='left red'],[class='right blue']"):
			averagedamage.append(i.text)
		browser.find_element_by_xpath('//*[@id="line-tab"]/ul/li[2]/a').click()
		for i in browser.find_elements_by_css_selector("[class='left red'],[class='right blue']"):
			averagedamagetolerated.append(i.text)
		browser.find_element_by_xpath('//*[@id="line-tab"]/ul/li[3]/a').click()
		for i in browser.find_elements_by_css_selector("[class='left red'],[class='right blue']"):
			averagekilldamage.append(i.text)
		browser.find_element_by_xpath('//*[@id="line-tab"]/ul/li[4]/a').click()
		for i in browser.find_elements_by_css_selector("[class='left red'],[class='right blue']"):
			averagekilleddamagetolerated.append(i.text)

	dataframe = pd.DataFrame({'PlayerName':playername,'AverageDamage':averagedamage,'AverageDamageTolerated':averagedamagetolerated,\
			'AverageKillDamage':averagekilldamage,'AverageKilledDamageTolerated':averagekilleddamagetolerated})
	
	print(dataframe)
	return(dataframe)


#经济获取曲线
def goldenline(browser):
	Goldentime=[]
	Team1Golden=[]
	Team2Golden=[]
	goldenline=browser.find_element_by_xpath('//*[@id="area-chart-wrapper"]/div/div[1]/ul/li[2]/a').click()
	t= 0
	linetime=(len(browser.find_elements_by_class_name('highcharts-point'))-30)/2
	for point in browser.find_elements_by_class_name('highcharts-point')[11:]:
		ActionChains(browser).move_to_element(point).perform()

		t+=1
		if t>linetime-1:
			goldentext = browser.find_element_by_class_name("highcharts-tooltip").text.replace(' ', '')
			goldentext = re.split(':|●',goldentext)
			Goldentime.append(goldentext[0])
			Team1Golden.append(goldentext[3])
			Team2Golden.append(goldentext[5])
		if t>linetime*2-2:
			break
	dataframe = pd.DataFrame({'Goldentime' : Goldentime,goldentext[2]: Team1Golden,goldentext[4]:Team2Golden })

	print(dataframe)
	return(dataframe)

#补刀曲线
def creepscore(browser):
	CreepScoretime=[]
	Team1CreepScore=[]
	Team2CreepScore=[]
	CreepScoreline=browser.find_element_by_xpath('//*[@id="area-chart-wrapper"]/div/div[1]/ul/li[3]/a').click()
	linetime=(len(browser.find_elements_by_class_name('highcharts-point'))-30)/2
	t= 0
	for point in browser.find_elements_by_class_name('highcharts-point')[11:]:
		ActionChains(browser).move_to_element(point).perform()
		t+=1
		if t>linetime-1:
			CreepScoretext = browser.find_element_by_class_name("highcharts-tooltip").text.replace(' ', '')
			CreepScoretext = re.split(':|●',CreepScoretext)
			CreepScoretime.append(CreepScoretext[0])
			Team1CreepScore.append(CreepScoretext[3])
			Team2CreepScore.append(CreepScoretext[5])
		if t>linetime*2-2:
			break
	dataframe = pd.DataFrame({'CreepScoretime' : CreepScoretime,CreepScoretext[2]: Team1CreepScore,CreepScoretext[4]:Team2CreepScore })
	
	print(dataframe)
	return(dataframe)
def wanplusspider(url):
	for u in url:
		#获取页面
		browser.get(u)
		#获取游戏对局数目
		game=browser.find_elements_by_class_name('game')[0].find_elements_by_tag_name("a")
		
		for G in range(len(game)):
			top = browser.find_element_by_xpath('/html/body/div/div[3]/div[2]/div[1]/h1/a')
			ActionChains(browser).move_to_element(top).perform()
			browser.find_element_by_xpath('/html/body/div/div[3]/div[2]/div[1]/div/a['+str(G+1)+']').click()
			time.sleep(1)
			#获取游戏时长及对局名称
			Team1 = browser.find_element_by_xpath('/html/body/div/div[3]/div[2]/div[1]/ul/li[1]/a/span').text
			Team2 = browser.find_element_by_xpath('/html/body/div/div[3]/div[2]/div[1]/ul/li[3]/a/span').text
			#创造根目录
			root = "...//spidersheet//"+Team1+" VS "+Team2+"//"
			if not os.path.exists(root):
				os.mkdir(root)
			root = "...//spidersheet//"+Team1+" VS "+Team2+"//Game"+str(G+1)+"//"
			if not os.path.exists(root):
				os.mkdir(root)
		 


			dataframe0,playername = player(browser)
			dataframe0.to_csv(root +"Playerinformation.csv",index = False,sep=',',encoding = 'utf_8_sig')
			dataframe1 = percentage(browser,playername)
			dataframe1.to_csv(root +"Percentage.csv",index = False,sep=',',encoding = 'utf_8_sig')
			dataframe2 = average(browser,playername)
			dataframe2.to_csv(root +"Average.csv",index = False,sep=',',encoding = 'utf_8_sig')
			
			
			try:
				dataframe3 = goldenline(browser)
				dataframe3.to_csv(root +"TeamGolden.csv",index = False,sep=',',encoding = 'utf_8_sig')
				dataframe4 = creepscore(browser)
				dataframe4.to_csv(root +"TeamCreepScore.csv",index = False,sep=',',encoding = 'utf_8_sig')
			except:
				continue





if __name__ == '__main__':
	#读取赛事链接
	f = open('D:/spidersheet/LPL赛事.csv')
	data =pd.read_csv(f)
	url = data['Link']
	#设置浏览器
	option = webdriver.ChromeOptions()
	option.add_argument('disable-infobars')
	browser = webdriver.Chrome(chrome_options=option)
	#最大化窗口
	browser.maximize_window()

	#隐式等待
	browser.implicitly_wait(10)
	#主函数
	wanplusspider(url)
	#退出浏览器
	browser.quit()

爬取各英雄信息

import time
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import csv
import jieba

#创建一个名为lol的csv文件并填入表头
# fp=open("C://Users/74531/Desktop/lol.csv","wt",newline="",encoding="utf-8")
# writer=csv.writer(fp)
# writer.writerow(("hero","health_1","health_recover_1","magic_1","magic_recover_1","move_speed_1","attack_extent_1","attack_1","attack_speed_1","armor_1","magic_resistance_1","health_2","health_recover_2","magic_2","magic_recover_2","move_speed_2","attack_extent_2","attack_2","attack_speed_2","armor_2","magic_resistance_2","health_3","health_recover_3","magic_3","magic_recover_3","move_speed_3","attack_extent_3","attack_3","attack_speed_3","armor_3","magic_resistance_3","health_4","health_recover_4","magic_4","magic_recover_4","move_speed_4","attack_extent_4","attack_4","attack_speed_4","armor_4","magic_resistance_4","health_5","health_recover_5","magic_5","magic_recover_5","move_speed_5","attack_extent_5","attack_5","attack_speed_5","armor_5","magic_resistance_5","health_6","health_recover_6","magic_6","magic_recover_6","move_speed_6","attack_extent_6","attack_6","attack_speed_6","armor_6","magic_resistance_6","health_7","health_recover_7","magic_7","magic_recover_7","move_speed_7","attack_extent_7","attack_7","attack_speed_7","armor_7","magic_resistance_7","health_8","health_recover_8","magic_8","magic_recover_8","move_speed_8","attack_extent_8","attack_8","attack_speed_8","armor_8","magic_resistance_8","health_9","health_recover_9","magic_9","magic_recover_9","move_speed_9","attack_extent_9","attack_9","attack_speed_9","armor_9","magic_resistance_9","health_10","health_recover_10","magic_10","magic_recover_10","move_speed_10","attack_extent_10","attack_10","attack_speed_10","armor_10","magic_resistance_10","health_11","health_recover_11","magic_11","magic_recover_11","move_speed_11","attack_extent_11","attack_11","attack_speed_11","armor_11","magic_resistance_11","health_12","health_recover_12","magic_12","magic_recover_12","move_speed_12","attack_extent_12","attack_12","attack_speed_12","armor_12","magic_resistance_12","health_13","health_recover_13","magic_13","magic_recover_13","move_speed_13","attack_extent_13","attack_13","attack_speed_13","armor_13","magic_resistance_13","health_14","health_recover_14","magic_14","magic_recover_14","move_speed_14","attack_extent_14","attack_14","attack_speed_14","armor_14","magic_resistance_14","health_15","health_recover_15","magic_15","magic_recover_15","move_speed_15","attack_extent_15","attack_15","attack_speed_15","armor_15","magic_resistance_15","health_16","health_recover_16","magic_16","magic_recover_16","move_speed_16","attack_extent_16","attack_16","attack_speed_16","armor_16","magic_resistance_16","health_17","health_recover_17","magic_17","magic_recover_17","move_speed_17","attack_extent_17","attack_17","attack_speed_17","armor_17","magic_resistance_17","health_18","health_recover_18","magic_18","magic_recover_18","move_speed_18","attack_extent_18","attack_18","attack_speed_18","armor_18","magic_resistance_18","health_add","health_recover_add","magic_add","magic_recover_add","attack_add","attack_speed_add","armor_add","magic_resistance_add","physical_attack","magic_attack","defence_ability","operate_difficulty","KDA_average","win_rate","entry_rate","ban_rate","equipment_1","link_1","e_winrate_1","equipment_2","link_2","e_winrate_2","equipment_3","link_3","e_winrate_3","equipment_4","link_4","e_winrate_4","equipment_5","link_5","e_winrate_5","equipment_6","link_6","e_winrate_6","equipment_7","link_7","e_winrate_7","equipment_8","link_8","e_winrate_8","equipment_9","link_9","e_winrate_9","equipment_10","link_10","e_winrate_10","skill_1","s_winrate_1","skill_2","s_winrate_2","skill_3","s_winrate_3","skill_4","s_winrate_4","skill_5","s_winrate_5","skill_6","s_winrate_6","skill_7","s_winrate_7","skill_8","s_winrate_8","rival_1","rv_winrate_1","rival_2","rv_winrate_2","rival_3","rv_winrate_3","rival_4","rv_winrate_4","rival_5","rv_winrate_5","rival_6","rv_winrate_6","rival_7","rv_winrate_7","rival_8","rv_winrate_8","rival_9","rv_winrate_9","rival_10","rv_winrate_10","besthero_1","bh_winrate_1","besthero_2","bh_winrate_2","besthero_3","bh_winrate_3","besthero_4","bh_winrate_4","besthero_5","bh_winrate_5","besthero_6","bh_winrate_6","besthero_7","bh_winrate_7","besthero_8","bh_winrate_8","besthero_9","bh_winrate_9","besthero_10","bh_winrate_10"))

# #打开Chrome浏览器
driver=webdriver.Chrome()
#生成所有页面网址并存入列表中
urls=["https://www.wanplus.com/lol/hero/{}".format(str(i)) for i in range(1,150)]
hero=[]
health_recover_add=[]
magic_add=[]
magic_add=[]
magic_recover_add=[]
attack_add=[]
armor_add=[]
magic_resistance_add=[]

#遍历每页网址
for url in urls:
    #打开当前网页
    driver.get(url)
    
    #英雄(基础)
    hero=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[1]/span[1]')[0].text
    
    #获取每级各属性加成值,切割出相应的字符
    health_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/span[2]')[0].text)[2])
    health_recover_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[2]/span[2]')[0].text)[2])
    magic_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[3]/span[2]')[0].text)[2])
    magic_recover_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[4]/span[2]')[0].text)[2])
    attack_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[2]/div[3]/span[2]')[0].text)[2])
    attack_speed_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]')[0].text)[2])
    armor_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[3]/div[1]/span[2]')[0].text)[2])
    magic_resistance_add.append(jieba.lcut(driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[3]/div[2]/span[2]')[0].text)[2])
    
    #获取每级各基础属性值
    for i in range(1,19):
        
        #循环定义变量并赋值
        exec('''health_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/span[1]')[0].text'''%i)
        exec('''health_recover_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[2]/span[1]')[0].text'''%i)
        exec('''magic_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[3]/span[1]')[0].text'''%i)
        exec('''magic_recover_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[1]/div[4]/span[1]')[0].text'''%i)
        exec('''move_speed_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[2]/div[1]/span[1]')[0].text'''%i)
        exec('''attack_extent_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[2]/div[2]/span[1]')[0].text'''%i)
        exec('''attack_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[2]/div[3]/span[1]')[0].text'''%i)
        exec('''attack_speed_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[2]/div[4]/span[1]')[0].text'''%i)
        exec('''armor_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[3]/div[1]/span[1]')[0].text'''%i)
        exec('''magic_resistance_%s=driver.find_elements_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[2]/div[3]/div[2]/span[1]')[0].text'''%i)
        
        #模拟鼠标行为,点击+号按钮(翻到下一级)
        pic=driver.find_element_by_xpath('//*[@id="info"]/div[3]/div[1]/div[2]/div[2]/div[3]/div[3]/i')
        pic.click()
    
    #英雄评分
    physical_attack=driver.find_elements_by_xpath('//*[@id="info"]/div[4]/div[1]/div[2]/ul/li[1]')[0].text[5]
    magic_attack=driver.find_elements_by_xpath('//*[@id="info"]/div[4]/div[1]/div[2]/ul/li[2]')[0].text[5]
    defence_ability=driver.find_elements_by_xpath('//*[@id="info"]/div[4]/div[1]/div[2]/ul/li[3]')[0].text[5]
    operate_difficulty=driver.find_elements_by_xpath('//*[@id="info"]/div[4]/div[1]/div[2]/ul/li[4]')[0].text[5]
    
    #职业数据
    KDA_average=driver.find_elements_by_xpath('//*[@id="state-box"]/div[1]/div[2]/span')[0].text
    win_rate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[1]/div[3]/span[2]')[0].text
    entry_rate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[1]/div[4]/span[2]')[0].text
    ban_rate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[1]/div[5]/span[2]')[0].text
    
    #装备统计
    #获取当前英雄装备数量
    e_tbody=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[1]/table/tbody')
    e_tbo=e_tbody.find_elements_by_tag_name("tr")
    el=len(e_tbo)
    
    #综合所有英雄最大装备数为10,在当页英雄有装备的地方赋相应值,装备不足10的地方赋空值
    for i in range(1,11):
        if(i<=el):
            exec('''eq_%s=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[1]/table/tbody/tr[{}]/td[1]/a/span'.format(str(i)))[0].text'''%i)
            exec('''href_%s=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[1]/table/tbody/tr[{}]/td[1]/a'.format(str(i)))[0].get_attribute("href")'''%i)
            exec('''eq_%s_winrate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[1]/table/tbody/tr[{}]/td[3]/span'.format(str(i)))[0].text'''%i)
        else:
            exec("eq_%s=''"%i)
            exec("href_%s=''"%i)
            exec("eq_%s_winrate=''"%i)
    
    #技能统计   
    #模拟鼠标行为,点击技能统计
    pic=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[1]/ul/li[2]')
    pic.click()
    #获取当前英雄技能数量
    s_tbody=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[2]/table/tbody')
    s_tbo=s_tbody.find_elements_by_tag_name("tr")
    sl=len(s_tbo)
    
    #综合所有英雄最大技能数为8,在当页英雄有技能的地方赋相应值,技能不足8的地方赋空值
    for i in range(1,9):
        if(i<=sl):
            exec('''sk_%s=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[2]/table/tbody/tr[{}]/td[1]/span'.format(str(i)))[0].text'''%i)
            exec('''sk_%s_winrate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[2]/table/tbody/tr[{}]/td[3]/span'.format(str(i)))[0].text'''%i)
        else:
            exec("sk_%s=''"%i)
            exec("sk_%s_winrate=''"%i)
    
    #对手统计
    #模拟鼠标行为,点击对手统计
    pic=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[1]/ul/li[3]')
    pic.click()
    #获取当前英雄对手数量
    r_tbody=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[3]/table/tbody')
    r_tbo=r_tbody.find_elements_by_tag_name("tr")
    rl=len(r_tbo)
    
    #综合所有英雄最大对手数为10,在当页英雄有对手的地方赋相应值,对手不足10的地方赋空值
    for i in range(1,11):
        if(i<=rl):
            exec('''rv_%s=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[3]/table/tbody/tr[{}]/td[1]/a/span'.format(str(i)))[0].text'''%i)
            exec('''rv_%s_winrate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[3]/table/tbody/tr[{}]/td[3]/span'.format(str(i)))[0].text'''%i)
        else:
            exec("rv_%s=''"%i)
            exec("rv_%s_winrate=''"%i)
    
    #常见搭配
    #模拟鼠标行为,点击常见搭配
    pic=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[1]/ul/li[4]')
    pic.click()
    #获取当前英雄常见搭配数量
    b_tbody=driver.find_element_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[4]/table/tbody')
    b_tbo=b_tbody.find_elements_by_tag_name("tr")
    bl=len(b_tbo)
    
    #综合所有英雄最大搭配数为10,在当页英雄有搭配的地方赋相应值,搭配不足10的地方赋空值
    for i in range(1,11):
        if(i<=bl):
            exec('''bh_%s=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[4]/table/tbody/tr[{}]/td[1]/a/span'.format(str(i)))[0].text'''%i)
            exec('''bh_%s_winrate=driver.find_elements_by_xpath('//*[@id="state-box"]/div[2]/div[2]/div[4]/table/tbody/tr[{}]/td[3]/span'.format(str(i)))[0].text'''%i)
        else:
            exec("bh_%s=''"%i)
            exec("bh_%s_winrate=''"%i)
    #将当页英雄的所有信息一排写入lol.csv中        
    # writer.writerow((hero,health_1,health_recover_1,magic_1,magic_recover_1,move_speed_1,attack_extent_1,attack_1,attack_speed_1,armor_1,magic_resistance_1,health_2,health_recover_2,magic_2,magic_recover_2,move_speed_2,attack_extent_2,attack_2,attack_speed_2,armor_2,magic_resistance_2,health_3,health_recover_3,magic_3,magic_recover_3,move_speed_3,attack_extent_3,attack_3,attack_speed_3,armor_3,magic_resistance_3,health_4,health_recover_4,magic_4,magic_recover_4,move_speed_4,attack_extent_4,attack_4,attack_speed_4,armor_4,magic_resistance_4,health_5,health_recover_5,magic_5,magic_recover_5,move_speed_5,attack_extent_5,attack_5,attack_speed_5,armor_5,magic_resistance_5,health_6,health_recover_6,magic_6,magic_recover_6,move_speed_6,attack_extent_6,attack_6,attack_speed_6,armor_6,magic_resistance_6,health_7,health_recover_7,magic_7,magic_recover_7,move_speed_7,attack_extent_7,attack_7,attack_speed_7,armor_7,magic_resistance_7,health_8,health_recover_8,magic_8,magic_recover_8,move_speed_8,attack_extent_8,attack_8,attack_speed_8,armor_8,magic_resistance_8,health_9,health_recover_9,magic_9,magic_recover_9,move_speed_9,attack_extent_9,attack_9,attack_speed_9,armor_9,magic_resistance_9,health_10,health_recover_10,magic_10,magic_recover_10,move_speed_10,attack_extent_10,attack_10,attack_speed_10,armor_10,magic_resistance_10,health_11,health_recover_11,magic_11,magic_recover_11,move_speed_11,attack_extent_11,attack_11,attack_speed_11,armor_11,magic_resistance_11,health_12,health_recover_12,magic_12,magic_recover_12,move_speed_12,attack_extent_12,attack_12,attack_speed_12,armor_12,magic_resistance_12,health_13,health_recover_13,magic_13,magic_recover_13,move_speed_13,attack_extent_13,attack_13,attack_speed_13,armor_13,magic_resistance_13,health_14,health_recover_14,magic_14,magic_recover_14,move_speed_14,attack_extent_14,attack_14,attack_speed_14,armor_14,magic_resistance_14,health_15,health_recover_15,magic_15,magic_recover_15,move_speed_15,attack_extent_15,attack_15,attack_speed_15,armor_15,magic_resistance_15,health_16,health_recover_16,magic_16,magic_recover_16,move_speed_16,attack_extent_16,attack_16,attack_speed_16,armor_16,magic_resistance_16,health_17,health_recover_17,magic_17,magic_recover_17,move_speed_17,attack_extent_17,attack_17,attack_speed_17,armor_17,magic_resistance_17,health_18,health_recover_18,magic_18,magic_recover_18,move_speed_18,attack_extent_18,attack_18,attack_speed_18,armor_18,magic_resistance_18,health_add,health_recover_add,magic_add,magic_recover_add,attack_add,attack_speed_add,armor_add,magic_resistance_add,physical_attack,magic_attack,defence_ability,operate_difficulty,KDA_average,win_rate,entry_rate,ban_rate,eq_1,href_1,eq_1_winrate,eq_2,href_2,eq_2_winrate,eq_3,href_3,eq_3_winrate,eq_4,href_4,eq_4_winrate,eq_5,href_5,eq_5_winrate,eq_6,href_6,eq_6_winrate,eq_7,href_7,eq_7_winrate,eq_8,href_8,eq_8_winrate,eq_9,href_9,eq_9_winrate,eq_10,href_10,eq_10_winrate,sk_1,sk_1_winrate,sk_2,sk_2_winrate,sk_3,sk_3_winrate,sk_4,sk_4_winrate,sk_5,sk_5_winrate,sk_6,sk_6_winrate,sk_7,sk_7_winrate,sk_8,sk_8_winrate,rv_1,rv_1_winrate,rv_2,rv_2_winrate,rv_3,rv_3_winrate,rv_4,rv_4_winrate,rv_5,rv_5_winrate,rv_6,rv_6_winrate,rv_7,rv_7_winrate,rv_8,rv_8_winrate,rv_9,rv_9_winrate,rv_10,rv_10_winrate,bh_1,bh_1_winrate,bh_2,bh_2_winrate,bh_3,bh_3_winrate,bh_4,bh_4_winrate,bh_5,bh_5_winrate,bh_6,bh_6_winrate,bh_7,bh_7_winrate,bh_8,bh_8_winrate,bh_9,bh_9_winrate,bh_10,bh_10_winrate))

time.sleep(1)
#关闭文件
# fp.close()
#退出浏览器
driver.quit()

你可能感兴趣的:(python,爬虫,技巧,python)