【Python】批量下载新浪微博某用户的头像相册


import ChromeCookies
import requests
import re,time,os


USER_NAMBER = '1800591743'      # 微博ID,如“1955032717”

targetDir = 'result\\18-WeiboAnalbum.py\\'+USER_NAMBER    #文件保存路径  

# 获取保存路径
def destFile(path,name=''):
    if not os.path.isdir(targetDir):
        os.mkdir(targetDir)
    pos = path.rindex('/')
    if name=='':
        t = os.path.join(targetDir, path[pos+1:])
    else:
        t = os.path.join(targetDir, name)
    return t

# 保存图片
def saveImage(imgUrl,name=''):
    response = requests.get(imgUrl, stream=True)
    image = response.content
    imgPath = destFile(imgUrl,name)
    try:
        with open(imgPath ,"wb") as jpg:
            jpg.write(image)
            print('保存图片成功!%s' % imgPath)     
            return
    except IOError:
        print('保存图片成功!%s' % imgUrl)   
        return
    finally:
        jpg.close        

if __name__=='__main__':
    DOMAIN_NAME = '.weibo.com'
    cookies = ChromeCookies.get_chrome_cookies(DOMAIN_NAME)
    album_url = 'http://photo.weibo.com/photos/get_latest?uid='+USER_NAMBER
    response = requests.get(album_url, cookies=cookies)
    html_doc = response.text.encode('gbk','ignore').decode('gbk')
    imgnum = re.search(r'"total":(.*?),', html_doc).group(1)
    print(imgnum)
    for n in range(int(imgnum)//20+1):
        page = n+1
        get_url = album_url + '&page='+str(page)
        response = requests.get(get_url, cookies=cookies)

        html_doc = response.text.encode('gbk','ignore').decode('gbk')
        for match in re.finditer(r'"pic_name":"(.*?)"', html_doc,re.S):
            picture = match.group(1)
            pictureurl = 'http://ww3.sinaimg.cn/mw690/'+picture
            saveImage(pictureurl)


其中,ChromeCookies 类是获取 Chrome 浏览器所保存的 Cookie。

import subprocess
import sqlite3
import win32crypt
import re,os
import requests

def get_chrome_cookies(url):
    DIST_COOKIE_FILENAME = '.\python-chrome-cookies'
    SOUR_COOKIE_FILENAME = os.path.join(os.environ['LOCALAPPDATA'],r'Google\Chrome\User Data\Default\Cookies')
    if not os.path.exists(SOUR_COOKIE_FILENAME):
        raise Exception('Cookies 文件不存在...')
    subprocess.call(['copy', SOUR_COOKIE_FILENAME, DIST_COOKIE_FILENAME], shell=True)
    conn = sqlite3.connect(".\python-chrome-cookies")
    ret_dict = {}
    for row in conn.execute("SELECT host_key, name, path, value, encrypted_value FROM cookies"):
        if row[0] != url:
            continue
        ret = win32crypt.CryptUnprotectData(row[4], None, None, None, 0)
        ret_dict[row[1]] = ret[1].decode()
    conn.close()
    subprocess.call(['del', '.\python-chrome-cookies'], shell=True)
    return ret_dict


效果图:

【Python】批量下载新浪微博某用户的头像相册_第1张图片



GitHub地址:https://github.com/Jueee/05-WebCrawlers/blob/master/18-WeiboAnalbum.py

你可能感兴趣的:(新浪微博,python,爬虫,下载,相册)