python-爬取东方财富网期货市场大商所数据
注意因为是用谷歌爬取,所以要先下载好chromedriver
然后用到的第三方模块有os、time、openpyxl、webdriver
这里是将爬取下来的数据先存为txt,然后再转化为excel文件
def dashangsuo():
import os
import time
import openpyxl
from selenium import webdriver
# 打开chrome浏览器
response = webdriver.Chrome()
# 指向该网址
base_url = "http://quote.eastmoney.com/center/gridlist2.html#futures_114"
# 响应
response.get(base_url)
#因为网页响应可能没那么快,所以给5秒缓冲时间
time.sleep(5)
path = os.path.abspath("D:\python代码\eastmoneytry\eco_dashangsuo\output")
# 创建一个文件夹用于存放建立好的txt文件
folder_name = path
if not os.path.exists(os.path.join(path,folder_name)):
os.makedirs(os.path.join(path,folder_name))
# 定义文件
current_time=time.strftime('%Y-%m-%d',time.localtime())
file_name=current_time+".txt"
# 文件路径
file_path=folder_name+"/"+file_name
def get():
a = response.find_element_by_id("table_wrapper-table")
# 添加一段数据就换一次行
b = a.text + "\n"
#点击下一页
response.find_element_by_xpath("//*[@id='main-table_paginate']/a[2]").click()
#写入txt中
with open(file_path, "a", encoding="utf-8") as fp:
fp.write(b)
# 一共十三页运行十二遍
i = 0
while i<=12:
get()
i +=1
time.sleep(2)
# 打开存放数据的txt文件
fopen = open("D:\python代码\eastmoneytry\eco_dashangsuo\output"+'\\'+file_name, 'r',encoding="utf-8")
# 读取
lines = fopen.readlines()
# 建立一个excel文件
wb = openpyxl.Workbook()
# 获取当前有效的work sheet
sheet = wb.active
# 给excel的title命名
sheet.title = "期货市场大商所"
# print(lines)
for line in lines:
line = line.split(" ")
# print(line)
sheet.append(line)
path1 = os.path.abspath("D:\python代码\eastmoneytry\eco_dashangsuo\output_xlsx")
# 创建一个文件夹用于存放建立好的excel文件
folder_name_1 = path1
if not os.path.exists(os.path.join(path1, folder_name_1)):
os.makedirs(os.path.join(path1, folder_name_1))
#创建excel文件
file_name_1 = current_time + ".xlsx"
file_path_1=folder_name_1+"\\"+file_name_1
wb.save(file_path_1)
dashangsuo()