小学python教材电子版_【python爬虫】中小学人教版教材下载 (调用IDM)

根据楼主的python改的。就没做成运行文件,代码如下:新手勿喷。#!/usr/bin/env python3# encoding: utf-8'''@author: zengyun@software: tool@application:@file: down.py@time: 2020/2/21 15:46@desc:'''importrequests,bs4

fromtqdm importtqdm

#获取文件名称和下载地址defget_soup(url):r =requests.get(url)r.encoding ='utf-8'#用utf-8解码文档rt =r.text

soup =bs4.BeautifulSoup(rt,'lxml')returnsoup

#创建文件夹defmkdir(path):#引入模块importos

#去除首位空格path =path.strip()#去除尾部\符号path =path.rstrip("\\")#判断路径是否存在#存在True#不存在FalseisExists =os.path.exists(path)#判断结果if notisExists:#如果不存在则创建目录#创建目录操作函数os.makedirs(path)print(path+'创建成功')return Trueelse:#如果目录存在则不创建,并提示目录已存在print(path+'目录已存在')return False#获取桌面路径defget_desktop():key =winreg.OpenKey(winreg.HKEY_CURRENT_USER,r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders')returnwinreg.QueryValueEx(key,"Desktop")[0]#下载文件defget_pdf(savename,filename,url):response =requests.get(url,stream="TRUE")# stream=True的作用是仅让响应头被下载,连接保持打开状态,content_size =int(response.headers['Content-Length'])/1024#确定整个安装包的大小# pdf = response.contentpbar =tqdm(total=content_size,initial=0,unit='B',unit_scale=True,desc=filename)withopen(savename,'wb')asf:#下载文件forchunk inresponse.iter_content(chunk_size=1024):ifchunk:f.write(chunk)#更新文件大小pbar.update(1024)pbar.close()url ='https://bp.pep.com.cn/jc/index.html'#学科名称discipline0 ='人教版中小学电子教案'#学科主目录discipline1 =[]#学科一级目录discipline2 ={}#学科二级目录discipline =get_soup(url=url).find_all('div',"list_sjzl_jcdzs2020")#创建主目录mkdir(path=get_desktop()+"\\"+discipline0)fori indiscipline:discipline_class =i.find('div',"container_title_jcdzs2020")discipline_class_name =discipline_class.get_text()#创建一级目录mkdir(path=get_desktop()+"\\"+discipline0 +"\\"+discipline_class_name)#一级目录list# discipline1.append(discipline_class_name)fora ini.find_all('a'):value ='https://bp.pep.com.cn/jc'+a['href'][1:]key =a.get_text()discipline2[key]=value #创建字典#创建二级目录mkdir(path=get_desktop()+"\\"+discipline0 +"\\"+discipline_class_name +"\\"+key)textbook =get_soup(url=value).find_all('ul',"clearfix")fori intextbook:forj ini.find_all('li','fl js_cp'):save_name =get_desktop()+"\\"+discipline0 +"\\"+discipline_class_name +"\\"+key +'\\'+\

j.find('a')['title']+".pdf"h_url =j.find('a',"btn_type_dl")['href']d_url =value +h_url[2:]get_pdf(savename=save_name,filename=j.find('a')['title'],url=d_url)

你可能感兴趣的:(小学python教材电子版)