import re
import requests
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'}
html=requests.get('http://tieba.baidu.com/',headers=headers)
print html.text
print("That is all")
img_url=re.findall('img src="(.*?)"',html.text,re.S)
i=0
for each in img_url:
print each
pic=requests.get(each)
f=open('pic\\'+str(i)+'.jpg','wb')
f.write(pic.content)
f.close()
print str(i)+',jpg has done!'
i+=1
说明:
(1)本例爬取百度贴吧页面的图片,保存到pic文件夹中
(2)有些网页有反爬虫机制,需要添加headers,User-Agent可在网页源码中找到,注意格式
(3)利用requests.get()获取网页源码