'''爬虫阿铭linux教程,保存为本地的pdf文件'''
import re
import os
import pdfkit
import requests
if not os.path.exists("aminglinux"):
os.mkdir("aminglinux")
os.chdir("aminglinux")
url = "http://www.apelearn.com/study_v2/"
s = requests.session()
text = s.get(url).text
print(text)
reg = re.compile(r'.*')
result = reg.findall(text)
res = list(set(result))
pdfUrl = "http://www.apelearn.com/study_v2/"
for i in res:
url = "{0}{1}".format(pdfUrl, i)
pdfFileName = i.replace("html", "pdf")
print(pdfFileName)
try:
pdfkit.from_url(url, pdfFileName)
except:
continue