from selenium import webdriver from lxml import etree import json import os import time def write_to_file(content,p_name): new_time = time.strftime('%Y%m%d') with open('../20190708期货数据/'+new_time+p_name+'.txt', 'a', encoding='utf-8') as f: f.write(json.dumps(content, ensure_ascii=False) + '\n') f.close() def get_html(url,s_name,p_Name): try: driver.get(url=url) input0 = driver.find_element_by_id('futures_exchange') input0.send_keys(s_name) input1 = driver.find_element_by_id('futures_variety') input1.send_keys(p_Name) # 选择日期 # driver.execute_script("arguments[0].value=arguments[1]", driver.find_element_by_id("inputDate"), # "2019-07-08") input3 = driver.find_element_by_id('btnSearch') time.sleep(1) input3.click() time.sleep(1) html = driver.page_source return html except: print('没能打开浏览器') return None def parse_one_page(html): et = etree.HTML(html) ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8,ls9 = [],[],[],[],[],[],[],[],[] # 成交量龙虎榜 title01 = et.xpath("//div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFtit']/text()") ls1.append(title01) title11 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()") ls1.append(title11) content1 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFUlDiv']/ul[@id='ulCjl']/li//text()") for i1 in range(0, len(content1), 4): ls1.append( content1[i1:i1 + 4]) # 多头持仓龙虎榜 title02 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFtit']/text()") ls2.append(title02) title12 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()") ls2.append(title12) content2 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[@id='ulDtcc']/li//text()") for i2 in range(0, len(content2), 4): ls2.append(content2[i2:i2 + 4]) # 空头持仓龙虎榜 title03 = et.xpath( "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFtit']/text()") ls3.append(title03) title13 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[1]/li/span/text()") ls3.append(title13) content3 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[@id='ulKtcc']/li//text()") for i3 in range(0, len(content3), 4): ls3.append(content3[i3:i3 + 4]) # 净多头龙虎榜 title04 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFtit']/text()") ls4.append(title04) title14 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFUlDiv']/ul[1]/li/span/text()") ls4.append(title14) content4 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFUlDiv']/ul[@id='ulJdt']/li//text()") for i4 in range(0, len(content4), 4): ls4.append(content4[i4:i4 + 4]) # 多头增仓龙虎榜 title05 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFtit']/text()") ls5.append(title05) title15 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()") ls5.append(title15) content5 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFUlDiv']/ul[@id='ulDtzc']/li//text()") for i5 in range(0, len(content5), 4): ls5.append(content5[i5:i5 + 4]) # 多头减仓龙虎榜 title06 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFtit']/text()") ls6.append(title06) title16 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[1]/li/span/text()") ls6.append(title16) content6 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[@id='ulDtjc']/li//text()") for i6 in range(0, len(content6), 4): ls6.append(content6[i6:i6 + 4]) # 净空头龙虎榜 title07 = et.xpath( "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFtit']/text()") ls7.append(title07) title17 = et.xpath( "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[1]/li/span/text()") ls7.append(title17) content7 = et.xpath( "/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[@id='ulDtjc']/li//text()") for i7 in range(0, len(content7), 4): ls7.append(content7[i7:i7 + 4]) # 空头增仓龙虎榜 title08 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFtit']/text()") ls8.append(title08) title18 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[1]/li/span/text()") ls8.append(title18) content8 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[@id='ulKtzc']/li//text()") for i8 in range(0, len(content8), 4): ls8.append(content8[i8:i8 + 4]) #空头减仓龙虎榜 title09 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFtit']/text()") ls9.append(title09) title19 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[1]/li/span/text()") ls9.append(title19) content9 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[@id='ulKtjc']/li//text()") for i9 in range(0, len(content9), 4): ls9.append(content9[i9:i9 + 4]) return [ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8,ls9] def Main(): Date = { '上海期货交易所':['沪银','沪铝','沪金','沥青','沪铜','燃油','热卷','镍','沪铅','螺纹钢','橡胶','锡','纸浆','沪锌'], '大连商品期货交易所':['豆一','豆二','玉米','玉米淀粉','乙二醇','铁矿石','焦炭','鸡蛋','焦煤','塑料','豆粕','棕榈','聚丙烯','PVC','豆油'], '郑州商品交易所':['郑煤','PTA','白糖','锰硅','硅铁','菜籽','菜粕','普麦','菜油','甲醇','晚籼','粳稻','玻璃','棉纱','红枣','郑棉','苹果'] } for key,value in Date.items(): for p in value: url = 'http://data.eastmoney.com/futures/dl/data.html' html = get_html(url,key,p) print('--- 正在解析网站 ---') try: ls = parse_one_page(html) for i in ls: for j in i: write_to_file(j,p) print(p,"---写入成功---") except Exception: print('爬虫出错了') if __name__ == '__main__': while True: n = time.strftime('%H:%M:%S') print(n) time.sleep(1) if n == "16:31:00": path = 'D:\chromedriver\chromedriver.exe' driver = webdriver.Chrome(path) if os.path.exists('20190708期货数据') is False: new_time = time.strftime('%Y%m%d') os.mkdir('../'+'20190708期货数据') Main() os.rename('../20190708期货数据',r'D:\Code\Spider\Date\期货数据\\'+new_time+'20190708期货数据' ) driver.close() break