import ChromeCookies
import requests
import re,time,os
USER_NAMBER = '1800591743' # 微博ID,如“1955032717”
targetDir = 'result\\18-WeiboAnalbum.py\\'+USER_NAMBER #文件保存路径
# 获取保存路径
def destFile(path,name=''):
if not os.path.isdir(targetDir):
os.mkdir(targetDir)
pos = path.rindex('/')
if name=='':
t = os.path.join(targetDir, path[pos+1:])
else:
t = os.path.join(targetDir, name)
return t
# 保存图片
def saveImage(imgUrl,name=''):
response = requests.get(imgUrl, stream=True)
image = response.content
imgPath = destFile(imgUrl,name)
try:
with open(imgPath ,"wb") as jpg:
jpg.write(image)
print('保存图片成功!%s' % imgPath)
return
except IOError:
print('保存图片成功!%s' % imgUrl)
return
finally:
jpg.close
if __name__=='__main__':
DOMAIN_NAME = '.weibo.com'
cookies = ChromeCookies.get_chrome_cookies(DOMAIN_NAME)
album_url = 'http://photo.weibo.com/photos/get_latest?uid='+USER_NAMBER
response = requests.get(album_url, cookies=cookies)
html_doc = response.text.encode('gbk','ignore').decode('gbk')
imgnum = re.search(r'"total":(.*?),', html_doc).group(1)
print(imgnum)
for n in range(int(imgnum)//20+1):
page = n+1
get_url = album_url + '&page='+str(page)
response = requests.get(get_url, cookies=cookies)
html_doc = response.text.encode('gbk','ignore').decode('gbk')
for match in re.finditer(r'"pic_name":"(.*?)"', html_doc,re.S):
picture = match.group(1)
pictureurl = 'http://ww3.sinaimg.cn/mw690/'+picture
saveImage(pictureurl)
其中,ChromeCookies 类是获取 Chrome 浏览器所保存的 Cookie。
import subprocess
import sqlite3
import win32crypt
import re,os
import requests
def get_chrome_cookies(url):
DIST_COOKIE_FILENAME = '.\python-chrome-cookies'
SOUR_COOKIE_FILENAME = os.path.join(os.environ['LOCALAPPDATA'],r'Google\Chrome\User Data\Default\Cookies')
if not os.path.exists(SOUR_COOKIE_FILENAME):
raise Exception('Cookies 文件不存在...')
subprocess.call(['copy', SOUR_COOKIE_FILENAME, DIST_COOKIE_FILENAME], shell=True)
conn = sqlite3.connect(".\python-chrome-cookies")
ret_dict = {}
for row in conn.execute("SELECT host_key, name, path, value, encrypted_value FROM cookies"):
if row[0] != url:
continue
ret = win32crypt.CryptUnprotectData(row[4], None, None, None, 0)
ret_dict[row[1]] = ret[1].decode()
conn.close()
subprocess.call(['del', '.\python-chrome-cookies'], shell=True)
return ret_dict
效果图:
GitHub地址:https://github.com/Jueee/05-WebCrawlers/blob/master/18-WeiboAnalbum.py