# 爬取豆瓣碟中谍图片
import requests
from bs4 import BeautifulSoup
# python3 中的urllib没有下载方法urlretrieve,这里需要引入 urllib.request ,调用urllib.request的urlretrieve方法进行下载。
import urllib.request
import time
import os
url = "https://movie.douban.com/subject/26336252/celebrities"
head = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
res = requests.get(url, headers = head)
res.encoding = "utf-8"
# print(res.text)
soup = BeautifulSoup(res.text,"lxml")
# fileurl = "e:\pythonFile\碟中谍\\"
# if os.path.exists(fileurl) == False:
# os.mkdir(fileurl)
# with open(fileurl+"dzd.txt","w+",encoding = "utf-8-sig") as f:
# f.write(res.text)
plists = soup.select(".celebrity")
print(len(plists))
dataList = []
for plist in plists:
print(plist.select("a")[0]["title"]+"------"+plist.select("div")[0]["style"][22:-1])
temp = (plist.select("a")[0]["title"].strip(),plist.select("div")[0]["style"][22:-1])
dataList.append(temp)
# 创建文件夹,将图片下载到文件夹中
picDir = "e:\pythonFile\碟中谍图片\\"
if os.path.exists(picDir) == False:
os.makedirs(picDir)
index = 1
for a,b in dataList:
urllib.request.urlretrieve(b,picDir+a+".png")
print("下载第"+str(index)+"个")
index = index + 1
time.sleep(0.01)