python.requests实战58写字楼

1.先看效果


python.requests实战58写字楼_第1张图片
image.png

2.思路
反爬虫,武装user-agent

3.上源代码






import re
import requests 
from bs4 import BeautifulSoup
class Guiyang(object):

    def __init__(self):
        self.page = range(1,10)
        self.url = 'http://gy.58.com/zhaozu/?PGTID=0d00000d-0000-0ee8-d8e7-f5dce12e009e&ClickID={}'.format(self.page)
        self.headers = {
                    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
                    ,'Host':'gy.58.com'
                    ,'Upgrade-Insecure-Requests':'1'
                        }


        self.link_url = self.get_data()

    def get_data(self):

        data = {
        'PGTID':'0d00000d-0000-0ee8-d8e7-f5dce12e009e'
        ,'ClickID':'2'
        }

        r = requests.get(url=self.url,headers=self.headers,data=data).text



        s = BeautifulSoup(r,'lxml').find('a',class_='on').get_text()
        #print(s)
        soup = BeautifulSoup(r,'lxml').find('ul',class_='house-list-wrap').find_all('li')


        for items in soup:
            link_url = items.find('a')['href']  #每个url的链接
            #get_link = requests.get(item_link_url,headers=headers).text
            name = items.find('span',attrs={'class':'title_des'}).get_text()
            location =items.find('p',class_='baseinfo').get_text().replace('\n','')
            #pricea = items.find('p',class_='sum').get_text().replace('\n','')+str('>每平米')+'\n\n'
            try:
                pricetoday = items.find('p',class_='unit').get_text().replace(' ','').replace('\n','').replace('\r','')
                print('{},{},{}'.format(pricetoday,location,name))


            except:
                pass




c = Guiyang()
c.get_data()

你可能感兴趣的:(python.requests实战58写字楼)