python爬取百度贴吧

1.对百度贴吧的任意帖子进行抓取

2.指定是否只抓取楼主发帖内容

3.将抓取到的内容分析并保存到文件

import re
import bs4
from bs4 import BeautifulSoup
import requests

class TiebaSpider(object):

    def __init__(self,see_lz):
        self.see_lz=see_lz

    def getHTMLText(self,url,pageNumber):
        try:
            headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0'}
            r=requests.get(url+str(pageNumber),timeout=30,headers=headers)
            r.raise_for_status()
            r.encoding='utf-8'
            return r.text
        except:
            return 'ERROR'

    def getTitle(self,html):
        try:
            title=re.search(r'

你可能感兴趣的:(python爬取百度贴吧)