import requests
class tiebaSpider:
def __init__(self, tieba_name):
self.name = tieba_name
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/80.0.3987.132 Safari/537.36"}
self.url = "https://tieba.baidu.com/f?kw="+tieba_name+"&ie=utf-8&pn={}"
def get_url_list(self):
return [self.url.format(i * 50) for i in range(10)]
def get_response(self, url):
print(url)
response = requests.get(url, headers=self.headers)
return response.content.decode()
def save_html(self, page_num, html):
file_path = "{}-第{}页.html".format(self.name, page_num)
with open(file_path, "w", encoding="utf-8") as f:
f.write(html)
return None
def run(self):
url_list = self.get_url_list()
for url in url_list:
html = self.get_response(url)
page_num = url_list.index(url) + 1
self.save_html(page_num, html)
return None
if __name__ == "__main__":
tieba_spider = tiebaSpider("lol")
tieba_spider.run()