先用firebug分析请求图片的链接,拿到url。
GET /channel/listjson?fr=channel&tag1=%E7%BE%8E%E5%A5%B3&tag2=%E5%B0%8F%E6%B8%85%E6%96%B0&sorttype=0&pn=30&rn=60&ie=utf8&oe=utf-8&1380172568359 HTTP/1.1
Host: image.baidu.com
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0
Accept: */*
Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3
Accept-Encoding: gzip, deflate
X-Requested-With: XMLHttpRequest
Referer: http://image.baidu.com/channel
Cookie: BAIDUID=67B863A46926A8538FAA24A48EBD753D:FG=1; SSUDBTSP=1373611768; SSUDB=3V4YW5LZ3dGU202UTBTcFB5VmtCTkdRTDdqaHZVSFhEYzkwS2kwTXY2fjRNd2RTQVFBQUFBJCQAAAAAAAAAAAEAAADCdwQJaHVhODkxMTE4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPim31H4pt9RZ; MCITY=-%3A; BDUSS=3V4YW5LZ3dGU202UTBTcFB5VmtCTkdRTDdqaHZVSFhEYzkwS2kwTXY2fjRNd2RTQVFBQUFBJCQAAAAAAAAAAAEAAADCdwQJaHVhODkxMTE4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPim31H4pt9RZ; BAIDU_WISE_UID=68CEB3BE25F99C3F7250C9D764F65A02; userid=k21u44gs5; Hm_lvt_737dbb498415dd39d8abf5bc2404b290=1380162231,1380170431; H_PS_PSSID=; Hm_lpvt_737dbb498415dd39d8abf5bc2404b290=1380172544; the_nav_width=1479
Connection: keep-alive
#!/usr/bin/python
#coding=utf-8
import urllib,json,socket
import random,os
import sys,datetime
starttime = datetime.datetime.now()
socket.setdefaulttimeout(10)
dir ='/root/test/pic/'
if not os.path.isdir(dir):
os.mkdir(dir)
i=0
j=1
p=30
while i<10:
if i%2==0:
zipname = 'baiduzip_'+str(i)+'.zip'
print 'make a zip file'
os.system('zip -6qrm /root/test/'+zipname+' /root/test/pic/*')
print zipname+' file is ok!'
#http://image.baidu.com/i?tn=listjson&word=liulan&oe=utf-8&ie=utf8&tag1=%E6%90%9E%E7%AC%91&tag2=%E5%85%A8%E9%83%A8&sorttype=0&pn=30&rn=60&requestType=1&1357639151100
#url ='http://image.baidu.com/i?tn=listjson&word=liulan&oe=utf-8&ie=utf8&tag1=%E6%91%84%E5%BD%B1&tag2=%E5%85%A8%E9%83%A8&sorttype=0&pn='+str(p*i)+'&rn=60&requestType=1&'+str(random.random())
url ='http://image.baidu.com/channel/listjson?fr=channel&tag1=%E7%BE%8E%E5%A5%B3&tag2=%E5%B0%8F%E6%B8%85%E6%96%B0&sorttype=0&pn='+str(p*i)+'&rn=60&ie=utf8&oe=utf-8&'+str(random.random())
print url
try:
ipdata = urllib.urlopen(url).read()
except IOError,e:
#if e.message=="time out":
print('img %s_%s is false1' % (i,j) )
break
else:
ipdata1 = json.loads(ipdata)
if ipdata1['data']:
for n in ipdata1['data']:
if n and n['obj_url']:
try:
dataimg = urllib.urlopen(n['obj_url']).read()
except IOError,e:
#if e.message=="time out":
print('img %s_%s is false2' % (i,j) )
break
else:
fPostfix = os.path.splitext(n['obj_url'])[1]
if (fPostfix == '.png' or fPostfix == '.jpg' or fPostfix == '.PNG' or fPostfix == '.JPG'):
filename = dir+os.path.basename(n['obj_url'])
else:
filename = dir+os.path.basename(n['obj_url'])+'.jpg'
try:
file_object = open(filename,'w')
file_object.write(dataimg)
file_object.close()
except socket.timeout,e:
#if e.message=="timed out":
print('img %s_%s is false3' % (i,j) )
break
else:
#urllib.urlretrieve(n['obj_url'],filename)
print('img %s_%s is ok' % (i,j) )
j +=1
else:
break
i +=1
endtime = datetime.datetime.now()
print (endtime-starttime).seconds
os.system('zip -6qrm /root/test/pic_'+str(i)+'.zip /root/test/pic/*')
sys.exit()