&&&&&
# -*- coding: utf-8 -* from xlwt import Workbook import requests from bs4 import BeautifulSoup import sys reload(sys) sys.setdefaultencoding('utf8') import time def beida(page): url = 'http://162.105.134.150/searchCompy' data ={ 'eventId':'', 'loginName':'', 'keyWords':'', 'page.currentPage':page, 'qc.coName': '', 'qc.year': '0', 'qc.lp': '', 'qc.province': '', 'qc.co39': '0', 'qc.co42': '0', 'qc.co_data_15': '0', 'qc.co35': '0', 'qc.co_data_12': '0', 'qc.co_data_16': '0', 'qc.co34': '0', 'qc.active': '' } response =requests.post(url,data=data) soup =BeautifulSoup(response.text,'lxml') tableList = soup.find('div',class_='m-cont').find_all('tr') tableList.pop(0) dataInforList =[] for data in tableList: inforList = data.find_all('td') inforData = [] for info in inforList: inforData.append( ''.join(info.text.split())) dataInforList.append(inforData) return dataInforList def saveToExecl(start,end): book = Workbook(encoding='utf-8') # 设置execl编码格式 sheet1 = book.add_sheet('Sheet 1') # 操作execl表格 sheet1.write(0, 0, u'序号') sheet1.write(0, 1, u'法人单位名称') sheet1.write(0, 2, u'法人') sheet1.write(0, 3, u'省(自治区、直辖市)') sheet1.write(0, 4, u'街道') sheet1.write(0, 5, u'年份') sheet1.write(0, 6, u'组织机构代码') sheet1.write(0, 7, u'主要业务活动') sheet1.write(0, 8, u'行业') sheet1.write(0, 9, u'登记注册类型') sheet1.write(0, 10, u'企业控股情况') sheet1.write(0, 11, u'隶属关系') sheet1.write(0, 12, u'企业营业状态') sheet1.write(0, 13, u'机构类型') sheet1.write(0, 14, u'营业收入(元)') sheet1.write(0, 15, u'企业规模') sheet1.write(0, 16, u'轻重工业') writeDataList = [] print "The number of pages being downloaded now...." for page in range(int(start), int(end)): try: writeDataList += beida(page) print page except: print page time.sleep(3) writeDataList += beida(page) datalist = writeDataList for data in range(0, len(datalist)): # 遍历数据列表,然后把数据写入表格中 line01 = datalist[data][0] line02 = datalist[data][1] line03 = datalist[data][2] line04 = datalist[data][3] line05 = datalist[data][4] line06 = datalist[data][5] line07 = datalist[data][6] line08 = datalist[data][7] line09 = datalist[data][8] line10 = datalist[data][9] line11 = datalist[data][10] line12 = datalist[data][11] line13 = datalist[data][12] line14 = datalist[data][13] line15 = datalist[data][14] line16 = datalist[data][15] line17 = datalist[data][16] sheet1.write(data + 1, 0, line01) sheet1.write(data + 1, 1, line02) sheet1.write(data + 1, 2, line03) sheet1.write(data + 1, 3, line04) sheet1.write(data + 1, 4, line05) sheet1.write(data + 1, 5, line06) sheet1.write(data + 1, 6, line07) sheet1.write(data + 1, 7, line08) sheet1.write(data + 1, 8, line09) sheet1.write(data + 1, 9, line10) sheet1.write(data + 1, 10, line11) sheet1.write(data + 1, 11, line12) sheet1.write(data + 1, 12, line13) sheet1.write(data + 1, 13, line14) sheet1.write(data + 1, 14, line15) sheet1.write(data + 1, 15, line16) sheet1.write(data + 1, 16, line17) fileName = '中国工业企业数据库'+ str(start) +'-'+ str(end) +'.xls' book.save(u"%s" % fileName) if __name__ == "__main__": print "*********************Chinese industrial enterprise database download program*********************" start = raw_input("please input start page number: ") end = raw_input("please input end page number: ") saveToExecl(start,end)
&&&&&