import ChromeCookies import requests import re,time,os USER_NAMBER = '1800591743' # 微博ID,如“1955032717” targetDir = 'result\\18-WeiboAnalbum.py\\'+USER_NAMBER #文件保存路径 # 获取保存路径 def destFile(path,name=''): if not os.path.isdir(targetDir): os.mkdir(targetDir) pos = path.rindex('/') if name=='': t = os.path.join(targetDir, path[pos+1:]) else: t = os.path.join(targetDir, name) return t # 保存图片 def saveImage(imgUrl,name=''): response = requests.get(imgUrl, stream=True) image = response.content imgPath = destFile(imgUrl,name) try: with open(imgPath ,"wb") as jpg: jpg.write(image) print('保存图片成功!%s' % imgPath) return except IOError: print('保存图片成功!%s' % imgUrl) return finally: jpg.close if __name__=='__main__': DOMAIN_NAME = '.weibo.com' cookies = ChromeCookies.get_chrome_cookies(DOMAIN_NAME) album_url = 'http://photo.weibo.com/photos/get_latest?uid='+USER_NAMBER response = requests.get(album_url, cookies=cookies) html_doc = response.text.encode('gbk','ignore').decode('gbk') imgnum = re.search(r'"total":(.*?),', html_doc).group(1) print(imgnum) for n in range(int(imgnum)//20+1): page = n+1 get_url = album_url + '&page='+str(page) response = requests.get(get_url, cookies=cookies) html_doc = response.text.encode('gbk','ignore').decode('gbk') for match in re.finditer(r'"pic_name":"(.*?)"', html_doc,re.S): picture = match.group(1) pictureurl = 'http://ww3.sinaimg.cn/mw690/'+picture saveImage(pictureurl)
其中,ChromeCookies 类是获取 Chrome 浏览器所保存的 Cookie。
import subprocess import sqlite3 import win32crypt import re,os import requests def get_chrome_cookies(url): DIST_COOKIE_FILENAME = '.\python-chrome-cookies' SOUR_COOKIE_FILENAME = os.path.join(os.environ['LOCALAPPDATA'],r'Google\Chrome\User Data\Default\Cookies') if not os.path.exists(SOUR_COOKIE_FILENAME): raise Exception('Cookies 文件不存在...') subprocess.call(['copy', SOUR_COOKIE_FILENAME, DIST_COOKIE_FILENAME], shell=True) conn = sqlite3.connect(".\python-chrome-cookies") ret_dict = {} for row in conn.execute("SELECT host_key, name, path, value, encrypted_value FROM cookies"): if row[0] != url: continue ret = win32crypt.CryptUnprotectData(row[4], None, None, None, 0) ret_dict[row[1]] = ret[1].decode() conn.close() subprocess.call(['del', '.\python-chrome-cookies'], shell=True) return ret_dict
效果图:
GitHub地址:https://github.com/Jueee/05-WebCrawlers/blob/master/18-WeiboAnalbum.py