pathon

这是一个简单的xls的文件
import xlwt#
book = xlwt.Workbook(encoding = ‘utf-8’)#
sheet = book.add_sheet(‘sheet1’)
sheet.write(0,0,‘pathon’)sheet.write(1,1,‘love’)
book.save(‘d:/test.xls’)

from lxml import etreeimport requestsimport xlwt
all_info_list = []
ef get_info(url):
res = requests.get(url)
html = etree.HTML(res.text)

    infos = html.xpath('//ul[@class="sellListContent"]/li')   
       for info in infos:      
       title = info.xpath('div[1]/div[1]/a/text()')[0]      
          price = info.xpath('div[1]/div[6]/div[1]/span/text()')[0]        address = info.xpath('div[1]/div[2]/div/a/text()')[0]        
              room = info.xpath('div[1]/div[2]/div/text()')[0].split(' | ')[1]    
                  area = info.xpath('div[1]/div[2]/div/text()')[0].split(' | ')[2]        orient = info.xpath('div[1]/div[2]/div/text()')[0].split(' | ')[3]        style = info.xpath('div[1]/div[2]/div/text()')[0].split(' | ')[4]     
                       if len(info.xpath('div[1]/div[2]/div/text()')[0].split(' | ')) == 6:      
                             lift = info.xpath('div[1]/div[2]/div/text()')[0].split(' | ')[5]        else:            lift = '无电梯'        
                                    info_list = [title,price,address,room,area,orient,style,lift]    
                                        all_info_list.append(info_list) 
                                          ***https://blog.csdn.net/huyishero/article/details/79119911*** 
                                        if __name__ == '__main__':   
                                         book = xlwt.Workbook(encoding='utf-8')    
                                         sheet = book.add_sheet('sheet1')    
                                             header = ['title','price','address','room','area','orient','style','lift']    
                                              for t in range(len(header)):        sheet.write(0, t, header[t])    #
                                                     urls = ['https://cs.lianjia.com/ershoufang/pg{}/'.
                                                     format(str(i)) for i in range(1, 11)]  
                                                     all-info-list    for url in urls:        get_info(url)    
                                                             i = 1  
                                                               for list in all_info_list:     
                                                                  j = 0        for data in list:            
                                                              sheet.write(i, j, data)      
                                                                     j += 1        i += 1    
                                                                 book.save('d:/test.xls')

你可能感兴趣的:(学生)