记一次爬虫实践(二):下载图片及打包

登录之后,我们开始下载图片。
这里把完整程序po一下。
程序中用到的框架是flask,参考Flask文档

# encoding=UTF-8
import zipfile

import flask
import requests
import os
import urllib2
import time
from flask import Flask, jsonify

app = Flask(__name__)


@app.route('/msgId/')
def show_msgId(msgId):
    print 'test: ', msgId

    images = spider_img(int(msgId))

    zip_file = zip_images(images)

    for img in images: os.remove(img)
    //返回服务器上的图片包地址
    return flask.redirect('http://10.2.114.78/fximgs/{0}'.format(zip_file))


def spider_img(msgId):

    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
        'referer': 'https://sso.weidian.com/login/index.php?redirect=https://h5.weidian.com/m/message/detail.html?msgId=103931&spider_token=b78f&spider=fxmessage.messages.message-item-0.1',
        'origin': 'https://sso.weidian.com'
    }

    login_data = {
        'countryCode': '86',
        'phone': '187xxxx2867',
        'password': 'xxxxxx',
        'version': '1'
    }

    print 'start login'

    resp = session.post('https://sso.weidian.com/user/login', data=login_data, headers=headers)

    print resp.text

    url = '''https://vap.gw.weidian.com/h5/fxmsg/message.infoCard/1.0?request={"msgId":"%d"}''' % msgId
    res = session.get(url)
    print res
    result = res.json()
    if (result['status']['message'] == 'OK'):
        imgUrl = (result['result']['imgs'])

        print imgUrl

        images = download(imgUrl)

    else:
        print '接口异常,请稍后再试'

    return images

//压缩包
def zip_images(image_name):
    print 'creating archive'

    i = int(time.time())

    zip_file = str(i) + '.zip'

    zf = zipfile.ZipFile(zip_file, mode='w')

    try:
        for img in image_name:
            zf.write(img)

    finally:
        print 'closing'
        zf.close()

    return zip_file

//下载图片
def download(listurl):
    image_path = []
    i = int(time.time())
    for url in listurl:

        try:
            image_name = str(i) + '.jpeg'

            f = open(image_name, "a")
            req = urllib2.urlopen(url)
            buf = req.read()
            f.write(buf)
            i = i + 1
        except Exception, e:
            print 'download error'
            continue

        image_path.append(image_name)

    return image_path


if __name__ == '__main__':
    session = requests.session()
    app.run(host='0.0.0.0', port=8888)

你可能感兴趣的:(记一次爬虫实践(二):下载图片及打包)