下载漫画的代码,这个网站本身就是盗版的,也没有什么反爬,就是在下载图片的时候,在请求头里面有个referer需要注意下,需要带上是第几页,其他的就是正常下载了。
下载思路是在目录页里面获取所有章节的url,然后遍历了下所有url,在下载过程中加了一个多线程
下载每个章节下的图片。
import requests, os, re, threading
class YiRenZhiXia():
def __init__(self):
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Referer': 'https://www.lbsulu.com/mh/yirenzhixia/160246.html',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36',
}
self.comic_chapter_list_url = 'https://m.bnmanhua.com/comic/15.html' # 这是目录url
self.chapter_prefix = 'https://m.bnmanhua.com' # 章节url的前缀,目录页获取的章节url没有前缀
self.img_prefix = 'https://img.yaoyaoliao.com/' # 图片url的前缀,章节里面获取的图片url也没有前缀
def get_chapter_url(self): # 从目录页获取所有的章节url
response = requests.get(self.comic_chapter_list_url, headers=self.headers).text
# print(response)
chapter_url_list = list(re.findall('(.*?) ', response)) # 正则提取所有章节url和章节的名字
# print(chapter_url_list)
return chapter_url_list
def get_img_url(self, url): #从章节里面获取所有漫画图片的url
chapter_prefix = 'https://m.bnmanhua.com'
response = requests.get(chapter_prefix + url, headers=self.headers).text
img_url_list = re.findall(" var z_img='\[(.*?)\]';", response) # 正则提取下图片url列表
return img_url_list[0].split(',') # 切割一下,做成图片url的list
def save(self, i, url, referer, title): # 保存图片
headers = {
'accept': 'image / webp,image/apng,image/*, */*;q=0.8',
'accept - encoding': 'gzip, deflate, br',
'accept - language': 'zh-CN, zh;q = 0.9',
# 'referer': 'https://m.bnmanhua.com/comic/15/2369.html?p=5', 'https://img.yaoyaoliao.com/images/comic/159/316518/1519527845bO68qo1r4N0X8Hfr.jpg'
'referer': referer,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
response = requests.get(url, headers=headers).content # 下载图片
if os.path.isdir('E:\一人之下漫画' + '/' + title):
pass
else:
os.mkdir('E:\一人之下漫画' + '/' + title)
with open('E:\一人之下漫画' + '/' + title + '/' + str(i + 1) + '.' + title + '.jpg', 'wb') as f:
f.write(response)
print('正在下载:' + title + '-' + str(i + 1))
def run(self):
os.mkdir('E:\一人之下漫画')
chapter_url_list = self.get_chapter_url()
for chapter_url, chapter_title in chapter_url_list:
print(
'-------------------------------------------开始章节:' + chapter_title + '-------------------------------------------')
img_url_list = self.get_img_url(chapter_url)
threads = []
for i in range(len(img_url_list)):
if i == 0:
referer = self.chapter_prefix + chapter_url
else:
referer = self.chapter_prefix + chapter_url + str(i + 1)
url = self.img_prefix + img_url_list[i].split('"')[1].replace('\\', '')
t = threading.Thread(target=self.save, args=(i, url, referer, chapter_title))
threads.append(t)
for s in threads:
s.start()
for j in threads:
j.join()
if __name__ == '__main__':
yirenzhixia = YiRenZhiXia()
yirenzhixia.run()