python爬取贴吧题目

代码简单不过多解释
以抑郁症吧为例子

from urllib.request import urlopen
import codecs
from bs4 import BeautifulSoup
i = 0
f = codecs.open("douban.txt", "w","utf-8")
while i< 100000:
    a = "http://tieba.baidu.com/f?kw=%E6%8A%91%E9%83%81%E7%97%87&ie=utf-8&pn="+str(i)
    i+=50
    z = (i/50)
    print("第"+ str(z) + "页")
    html = urlopen(a)
    bsObj = BeautifulSoup(html, "html.parser")
    for links in bsObj.findAll("a", {"class": "j_th_tit"}):
        print(links.attrs["href"] + "  " + links.text)
        f.write(links.text+"+++"+"\n")
f.close()

你可能感兴趣的:(python学习之路)