#! /usr/bin/python
import urllib, os,os.path
import logging
import datetime
#file log
log_file = '/root/logs/sys_%s.log' % datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
log_level = logging.INFO
log_format = '%(asctime)s[%(levelname)s]: %(message)s'
logging.basicConfig(filename=log_file, level=log_level, format=log_format)
log=logging.getLogger()
def url_open(url):
response = urllib.urlopen(url)
html = response.read()
log.info('html..')
return html
def find_imgs(url):
html = url_open(url).decode('GB2312')
img_addrs = []
log.info('find_imgs..')
a = html.find('img src=')
total=0
while a!=-1:
print('w...')
b=html.find('.jpg',a,a+40)
if b!=-1:
imgurl=html[a+9:b+4]
img_addrs.append('http://www.people.com.cn'+imgurl)
print(imgurl)
total=total+1
else:
b=a+9
a=html.find('img src=',b)
if total>5:
break
return img_addrs
def save_imgs(folder, img_addrs):
for each in img_addrs:
filename = each.split('/')[-1]
with open(filename, 'wb') as f:
img = url_open(each)
f.write(img)
def download_mm(folder = 'xx', pages = 5):
if not os.path.exists('/root/xx'):
os.mkdir('/root/xx')
os.chdir('/root/'+folder)
log.info('init..')
url = 'http://www.people.com.cn/'
img_addrs = find_imgs(url)
log.info('imgs='+img_addrs[0])
save_imgs(folder, img_addrs)
if __name__ == '__main__':
download_mm()