效果图请自行脑补,此处不做展示(狗头保命)
from lxml import etree
import requests
import os
if __name__ == '__main__':
url = "http://pic.netbian.com/4kmeinv/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/"
"85.0.4183.83 Safari/537.36"
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
page_text = response.text
tree = etree.HTML(page_text)
li_list = tree.xpath("//div[@class='slist']/ul/li")
if not os.path.exists("./picLibs"):
os.mkdir("./picLibs")
for li in li_list:
img_src = "http://pic.netbian.com" + li.xpath("./a/img/@src")[0]
img_name = li.xpath("./a/img/@alt")[0] + ".jpg"
img_data = requests.get(url=img_src, headers=headers).content
img_path = "picLibs/" + img_name
with open(img_path, "wb") as fp:
fp.write(img_data)
print(img_name + "下载成功")
for x in range(2, 172):
url_too = f"http://pic.netbian.com/4kmeinv/index_{x}.html"
response_too = requests.get(url=url_too, headers=headers)
response_too.encoding = response_too.apparent_encoding
page_text = response_too.text
tree_too = etree.HTML(page_text)
li_list = tree_too.xpath("//div[@class='slist']/ul/li")
for li_too in li_list:
new_img_src = f"http://pic.netbian.com/4kmeinv/index_{x}.html" + li_too.xpath("./a/img/@src")[0]
img_name = li_too.xpath("./a/img/@alt")[0] + ".jpg"
img_data = requests.get(url=new_img_src, headers=headers).content
img_path = "picLibs/" + img_name
with open(img_path, "wb") as fp:
fp.write(img_data)
print(img_name + "下载成功")
优化后(可能并没有优化)
from lxml import etree
import requests
import os
def get(url, headers):
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
return response.text
def parse(url, headers):
response = get(url=url, headers=headers)
tree = etree.HTML(response)
li_list = tree.xpath("//div[@class='slist']/ul/li")
return li_list
def save(url):
for li in li_list:
img_src = url + li.xpath("./a/img/@src")[0]
img_name = li.xpath("./a/img/@alt")[0] + ".jpg"
img_data = requests.get(url=img_src, headers=headers).content
img_path = "小姐姐图片/" + img_name
with open(img_path, "wb") as fp:
fp.write(img_data)
print(img_name + "下载成功")
if __name__ == '__main__':
url = "http://pic.netbian.com/4kmeinv/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/"
"85.0.4183.83 Safari/537.36"
}
li_list = parse(url=url, headers=headers)
if not os.path.exists("./小姐姐图片"):
os.mkdir("./小姐姐图片")
save(url)
for x in range(2, 172):
url = f"http://pic.netbian.com/4kmeinv/index_{x}.html"
li_list = parse(url=url, headers=headers)
save(url)