# -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup from selenium import webdriver import time import json import sys reload(sys) sys.setdefaultencoding("utf-8") curpath=sys.path[0] print curpath def getData(url): driver=webdriver.Chrome() driver.set_page_load_timeout(40) time.sleep(3) html=driver.get(url[0]) for page in range(1): html=driver.page_source soup=BeautifulSoup(html,'lxml') table=soup.find('div',class_="unit_loan_prj_detail") name=[] for th in table.find_all('span',class_="prolist_info_title"): name.append(th.get_text()) i=0 for tr in table.find_all('span',class_="prolist_info_detail"): dic={} value=tr.get_text() if value is not None: dic[name[i]]=value else: for td in tr.find_all('span'): dic[name[i]]=td.get_text() i+=1 jsonDump(dic,url[1]) def jsonDump(_json,name): with open(curpath+'/'+name+'.json','a')as outfile: json.dump(_json,outfile,ensure_ascii=False) with open(curpath+'/'+name+'.json','a')as outfile: outfile.write(',\n') if __name__=='__main__': url=['http://www.powerec.net/gdwz-web/html/xjxx/inquiry_detail.html?inq_h_id=ZGFmNTM2ZjctOWFlYi00ZDEyLWEyZjItNDFjNjAxYmY4MTZj','test'] getData(url)