网站相对比较简单,用re正则表达式进行分析即可,图片用了水印,去水印的方法是将图片第一个.jpg后面的代码去掉
import re
import requests
import os
#获取网页
def ImageURL(url):
img_dic={
}
content=requests.get(url).content.decode("utf-8")
img_name = re.findall(", content)
img_address = re.findall(",content)
for i in range(len(img_name)):
img_dic[img_name[i]]=img_address[i]
return img_dic
#获取户型图网页
def ImageURL2(url):
content = requests.get(url).content.decode("utf-8")
img_url = re.findall(",content)
return img_url
#获取房源图
def ImagGet(item,dir_path,img_url):
with open(dir_path+"\\"+item+".jpg","wb") as pic:
print("-------------", "正在下载:",item, "-------------")
pic.write(requests.get(img_url).content)
print("-------------", "下载完毕:",item, "-------------")
#获取户型图
def ImagGet2(img_url,path):
url="https://hui.fang.ke.com//"+img_url
content=requests.get(url).text
img_name = re.findall(", content)
img_address=re.findall(",content)
img_address_xiaoguotu=re.findall(",content)
for i in range(0,len(img_address)):
with open(path+"\\"+img_name[i]+".jpg","wb") as f:
print("-------------","正在下载:",img_name[i],"-------------")
f.write(requests.get(img_address[i]).content)
print("-------------", "下载完毕:",img_name[i], "-------------")
for j in range(len(img_address_xiaoguotu)):
if len(img_name)<1:
path_img=path+"\\"+str(j)+".jpg"
else:
path_img=path+"\\"+img_name[0]+str(j)+".jpg"
with open(path_img,"wb") as pic:
print("-------------", "正在下载:", img_name[0], "-------------")
pic.write(requests.get(img_address_xiaoguotu[j]).content)
print("-------------", "下载完毕:", img_name[0], "-------------")
#创建文件夹
def CreatDir(dir_path):
path = dir_path.strip()
path = path.rstrip("\\")
isExists = os.path.exists(path)
if not isExists:
os.makedirs(path)
path="E:\\pic\\" #图片保存的文件地址
#dir_path="E:\\pic\\图片"
#CreatDir(dir_path)
#img_url=ImageURL2("https://hui.fang.ke.com//loupan//pg1")
#ImagGet(img_url,"E:\\pic\\")
for i in range(1,101):
url = "https://hui.fang.ke.com/loupan/pg"+str(i)
#每页的资源
img_dic = ImageURL(url)
img_url=ImageURL2(url)
index=0
for item in img_dic:
dir_path=path.rstrip("\\")
# CreatDir(dir_path)
ImagGet(item, dir_path,img_dic[item])
ImagGet2(img_url[index], dir_path)
index+=1
print("-------------", "下载结束", "-------------")