python : selenium 网页爬虫 读取列表文件

selenium 网页爬虫 读取基金代码列表文件 flist.txt

先输入日期查询,再抓取天天基金网上的基金净值

fund3.py

# -*- coding: utf-8 -*-
import os, sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
from datetime import date

class Fund(unittest.TestCase):
    def setUp(self):
        today = date.today().strftime("%Y%m%d")
        ch_driver = os.path.abspath(r"D:\selenium\chromedriver.exe")
        os.environ["webdriver.chrome.driver"]= ch_driver
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(30)
        self.base_url = "http://fund.eastmoney.com"
        self.verificationErrors = []
        self.accept_next_alert = True
        self.flist = [] # fund list
        fp = open("flist.txt",'r')
        for line in fp:
            if len(line.strip()) ==6:
                self.flist.append(line.strip())
        fp.close()
        self.fp = open(today +'.txt','w')
        print today +'.txt'
    
    def test_fund(self):
        flist = self.flist
        fp = self.fp
        driver = self.driver
        for f1 in flist:
            driver.get(self.base_url + "/f10/jjjz_%s.html" % (f1))
            driver.find_element_by_id("lsjzSDate").clear()
            driver.find_element_by_id("lsjzSDate").send_keys("2017-06-29")
            driver.find_element_by_id("lsjzEDate").clear()
            driver.find_element_by_id("lsjzEDate").send_keys("2017-07-07")
            driver.find_element_by_css_selector("input.search").click()
            time.sleep(1)                
            try:
                div = driver.find_element_by_id("jztable")
                table = div.find_elements_by_tag_name("table")
                tbody = table[0].find_elements_by_tag_name("tbody")
                t_rows = tbody[0].find_elements_by_tag_name('tr')
                for row in t_rows:
                    fp.write(f1+' ')
                    tds = row.find_elements_by_tag_name('td')
                    for td in tds[0:4]:
                        fp.write(td.text +' ')
                    fp.write('\n')
            except Exception as msg:
                print msg
        #

    def is_element_present(self, how, what):
        try: self.driver.find_element(by=how, value=what)
        except NoSuchElementException as e: return False
        return True
    
    def is_alert_present(self):
        try: self.driver.switch_to_alert()
        except NoAlertPresentException as e: return False
        return True
    
    def close_alert_and_get_its_text(self):
        try:
            alert = self.driver.switch_to_alert()
            alert_text = alert.text
            if self.accept_next_alert:
                alert.accept()
            else:
                alert.dismiss()
            return alert_text
        finally: self.accept_next_alert = True
    
    def tearDown(self):
        self.fp.close()
        self.driver.quit()
        self.assertEqual([], self.verificationErrors)

if __name__ == "__main__":
    unittest.main()

flist.txt 一行一个基金代码:6位数字



你可能感兴趣的:(python,测试)