爬去东方财富网龙虎榜(wechat:15353378609)

爬去东方财富网龙虎榜(wechat:15353378609)_第1张图片

from selenium import webdriver
from lxml import etree
import json
import os
import time




def write_to_file(content,p_name):

    new_time = time.strftime('%Y%m%d')

    with open('../20190708期货数据/'+new_time+p_name+'.txt', 'a', encoding='utf-8') as f:

        f.write(json.dumps(content, ensure_ascii=False) + '\n')

        f.close()



def get_html(url,s_name,p_Name):

    try:
        driver.get(url=url)
        input0 = driver.find_element_by_id('futures_exchange')
        input0.send_keys(s_name)

        input1 = driver.find_element_by_id('futures_variety')
        input1.send_keys(p_Name)

        # 选择日期
        # driver.execute_script("arguments[0].value=arguments[1]", driver.find_element_by_id("inputDate"),
        #                       "2019-07-08")

        input3 = driver.find_element_by_id('btnSearch')

        time.sleep(1)
        input3.click()
        time.sleep(1)
        html = driver.page_source
        return html

    except:
        print('没能打开浏览器')
        return None

def parse_one_page(html):

    et = etree.HTML(html)
    ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8,ls9 = [],[],[],[],[],[],[],[],[]

    # 成交量龙虎榜
    title01 = et.xpath("//div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFtit']/text()")
    ls1.append(title01)
    title11 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()")
    ls1.append(title11)
    content1 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFUlDiv']/ul[@id='ulCjl']/li//text()")
    for i1 in range(0, len(content1), 4):
        ls1.append( content1[i1:i1 + 4])

    # 多头持仓龙虎榜

    title02 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFtit']/text()")
    ls2.append(title02)
    title12 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()")
    ls2.append(title12)
    content2 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[@id='ulDtcc']/li//text()")
    for i2 in range(0, len(content2), 4):
        ls2.append(content2[i2:i2 + 4])

    # 空头持仓龙虎榜

    title03 = et.xpath(
        "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFtit']/text()")
    ls3.append(title03)
    title13 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
    ls3.append(title13)
    content3 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[@id='ulKtcc']/li//text()")
    for i3 in range(0, len(content3), 4):
        ls3.append(content3[i3:i3 + 4])

    # 净多头龙虎榜

    title04 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFtit']/text()")
    ls4.append(title04)
    title14 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
    ls4.append(title14)
    content4 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFUlDiv']/ul[@id='ulJdt']/li//text()")
    for i4 in range(0, len(content4), 4):
        ls4.append(content4[i4:i4 + 4])

    # 多头增仓龙虎榜

    title05 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFtit']/text()")
    ls5.append(title05)
    title15 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()")
    ls5.append(title15)
    content5 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFUlDiv']/ul[@id='ulDtzc']/li//text()")
    for i5 in range(0, len(content5), 4):
        ls5.append(content5[i5:i5 + 4])

    # 多头减仓龙虎榜

    title06 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFtit']/text()")
    ls6.append(title06)
    title16 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
    ls6.append(title16)
    content6 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[@id='ulDtjc']/li//text()")
    for i6 in range(0, len(content6), 4):
        ls6.append(content6[i6:i6 + 4])

    # 净空头龙虎榜

    title07 = et.xpath(
        "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFtit']/text()")
    ls7.append(title07)
    title17 = et.xpath(
        "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
    ls7.append(title17)
    content7 = et.xpath(
        "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[@id='ulDtjc']/li//text()")
    for i7 in range(0, len(content7), 4):
        ls7.append(content7[i7:i7 + 4])

    # 空头增仓龙虎榜

    title08 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFtit']/text()")
    ls8.append(title08)
    title18 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
    ls8.append(title18)
    content8 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[@id='ulKtzc']/li//text()")
    for i8 in range(0, len(content8), 4):
        ls8.append(content8[i8:i8 + 4])

    #空头减仓龙虎榜

    title09 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFtit']/text()")
    ls9.append(title09)
    title19 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
    ls9.append(title19)
    content9 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[@id='ulKtjc']/li//text()")
    for i9 in range(0, len(content9), 4):
        ls9.append(content9[i9:i9 + 4])

    return [ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8,ls9]



def Main():
    Date = {
        '上海期货交易所':['沪银','沪铝','沪金','沥青','沪铜','燃油','热卷','镍','沪铅','螺纹钢','橡胶','锡','纸浆','沪锌'],
        '大连商品期货交易所':['豆一','豆二','玉米','玉米淀粉','乙二醇','铁矿石','焦炭','鸡蛋','焦煤','塑料','豆粕','棕榈','聚丙烯','PVC','豆油'],
        '郑州商品交易所':['郑煤','PTA','白糖','锰硅','硅铁','菜籽','菜粕','普麦','菜油','甲醇','晚籼','粳稻','玻璃','棉纱','红枣','郑棉','苹果']
    }


    for key,value in Date.items():
        for p in value:
            url = 'http://data.eastmoney.com/futures/dl/data.html'
            html = get_html(url,key,p)
            print('--- 正在解析网站 ---')
            try:
                ls = parse_one_page(html)
                for i in ls:
                    for j in i:
                        write_to_file(j,p)
                print(p,"---写入成功---")
            except Exception:
                print('爬虫出错了')

if __name__ == '__main__':

    while True:

        n = time.strftime('%H:%M:%S')

        print(n)

        time.sleep(1)

        if n == "16:31:00":
            path = 'D:\chromedriver\chromedriver.exe'

            driver = webdriver.Chrome(path)

            if os.path.exists('20190708期货数据') is False:

                new_time = time.strftime('%Y%m%d')

                os.mkdir('../'+'20190708期货数据')

                Main()

                os.rename('../20190708期货数据',r'D:\Code\Spider\Date\期货数据\\'+new_time+'20190708期货数据' )

            driver.close()

            break

你可能感兴趣的:(爬去东方财富网龙虎榜(wechat:15353378609))