爬取抽屉

import requests
import re

class chouti:
    def __init__(self):
        self.agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"
        self.host = "dig.chouti.com"
        self.headers = {
            'Host':self.host,
            'User-Agent':self.agent
        }
        self.session = requests.session()
        self.pageIndex = 1

    def getPage(self,pageIndex):
        url = "http://dig.chouti.com/all/hot/recent/" + str(pageIndex)
        response = self.session.get(url)
        pattern = re.compile('
.*?
.*?.*?
.*?.*?.*?

你可能感兴趣的:(爬取抽屉)