selenium获取动态网页信息(某东)-具体配置信息

 

需要安装的包:

selenium 
关于软件的驱动:
selenium之 驱动环境配置chrome、firefox、IE
 1 # encoding:utf-8
 2 # Author:"richie"
 3 # Date:8/16/2017
 4 
 5 import re,json
 6 from selenium import webdriver
 7 
 8 def spider(url):
 9     html = get_file(url)
10     com = re.compile(r'
  • .*?
    .*?(?P.)(?P.*?)' 11 r'.*?
    .*?(?P.*?)' 12 r'.*?
    .*?.*?(?P.*?)', re.S) 13 for item in com.finditer(html): 14 yield { 15 "name": item.group("name"), 16 "currency": item.group("currency"), 17 "price": item.group("price"), 18 "comment_num": item.group('comment_num'), 19 } 20 21 22 def get_file(url): 23 try: 24 driver = webdriver.Chrome() 25 driver.get(url) 26 driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 27 source = driver.page_source 28 driver.quit() 29 return source 30 except BaseException as e: 31 print(e) 32 return None 33 34 if __name__ == '__main__': 35 for i in range(1,2): 36 page_url = "https://list.jd.com/list.html?cat=9987,653,655&ev=exprice_M1800L2500&page="+str(i)+"&sort=sort_rank_asc&trans=1&JL=6_0_0" 37 ret = spider(page_url) 38 f = open("jingdong.txt", "a",encoding='utf-8') 39 for obj in ret: 40 data = json.dumps(obj, ensure_ascii=False) 41 f.write(data + "\n") 42 print("ok")
  •  

    转载于:https://www.cnblogs.com/richiewlq/p/7390248.html

    你可能感兴趣的:(python,json)