视频地址:
http://edu.51cto.com/lesson/id-12393.html
下载博客文章实例
源码:
import urllib import time #下载博客所有文章 i = 0 url = ['']*50 con = urllib.urlopen('http://blog.sina.com.cn/s/articlelist_3973495073_0_1.html').read() title = con.find(r'<a title=') href = con.find(r'href=', title) html = con.find(r'.html', href) while title != -1 and href != -1 and html != -1 and i < 50: url[i] = con[href + 6:html + 5] print url[i] title = con.find(r'<a title=', html) href = con.find(r'href=', title) html = con.find(r'.html', href) i = i + 1 else: print 'find end!' j = 0 while j < 50: content = urllib.urlopen(url[j]).read() open(r'hanhan/'+url[j][-26:],'w+').write(content) print 'downloading', url[j] j = j + 1 time.sleep(1) else: print 'download articles finished!'