1.3

*#coder:samko date:5.20 10:05#一个详情页爬取

c = ['female','male']url = 'http://bj.xiaozhu.com/fangzi/1779571235.html'page = requests.get(url)
soup = BeautifulSoup(page.text,'lxml')title = soup.select('h4 > em')
address = soup.select('span.pr5')
img = soup.select('img[id="curBigImage"]')
dailyrent = soup.select('div.day_l > span')
landlordimg = soup.select('div.member_pic > a > img')
landlordname = soup.select('h6 > a[class="lorder_name"]')
landlordgender = soup.select('div.w_240 > h6 > span')print(img)
for i,j,k,l,m,n,o in zip(title,address,img,dailyrent,landlordgender,landlordimg,landlordname):    def gender():        if  'member_girl_ico' in m:            return c[0]        else:            return c[1]    data = {        'title':i.get_text(),        'address':j.get_text(),        'img':k.get('src'),        'rent':l.get_text()+'元',        'lordimg':n.get('src'),        'lordname':o.get_text(),        'gender':gender()    }    print(data)#多个详情页爬取:如何批量获取链接from bs4 import BeautifulSoupimport requests,re,urllib.requestlinks = []#url = 'http://bj.xiaozhu.com'def get_page(PageNumbers):    for page in  range(2,PageNumbers):# 每页24个链接,这里输入的是页码        full_url = 'http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(page))        wb_data = requests.get(full_url)#不用在一个大页面下将每一个小的页面都打开分析        soup = BeautifulSoup(wb_data.text,'lxml')        for link in soup.select('a.resule_img_a'): # 找到这个 class 样为resule_img_a 的 a 标签即可            links.append(link['href'])#具体分析详情页,从这里面找就行!if __name__ == '__main__':    get_page(3)    print(links)#还有一种方法,是爬取所有的具体网页:'''def get_pages():    r = r'^http://bj.xiaozhu.com/fangzi/\d{9,10}\.html$'    lalala = re.compile(r)    lalala.findall(page)#page用urllib.request来写,就不具体分析了··,剩下的步骤与分析某一详情页是一样的!    ````'''```
的markdown真的不好用,不如jupyter

你可能感兴趣的:(1.3)