Python爬取链家地铁房数据

#coding=gbk
#因为涉及到中文,utf-8会报错
### 环境:Python 3.6###
import requests
import re
import pandas as pd
import csv
from bs4 import BeautifulSoup
def generate_allurl(user_in_nub):
    url = 'https://bj.lianjia.com/ditiefang/li647/pg{}/'
    for url_next in range(1, int(user_in_nub)):
        yield url.format(url_next)
def main():
    #user_in_nub = input('输入生成页数:')
    df = []
    for i in generate_allurl(35): #总共34页
        print("页码"+i)
        #get_allurl(i)
        res = requests.get(i)
        if res.status_code == 200:
            soup = BeautifulSoup(res.text, 'lxml') #获取html的文本
            re_set = re.compile('.*?


你可能感兴趣的:(采集数据)