后出现当前< div lass=“catgroup downloads” > 中间的所有内容
# -*-coding:utf-8 -*-
import urllib.request as urllib2
import os
def downpdf(pdflist):
x = 0
for pdfurl in pdflist:
print(pdfurl)
name = pdfurl.replace(".","/").split("/")[-2]
filename = r"C:\MorganPersonalFile\Sales\Python Script\72. Grap_data\Sensirion_downloadCenter\DownloadCenter\\" + name + ".pdf"
f = open(filename,'wb')
f.write(urllib2.urlopen(pdfurl).read())
f.close()
x += 1
print ("download %s pdf>>>>" %x)
else:
print ("download finished")
folder = os.path.dirname(os.path.realpath(__file__))
if os.path.exists(folder+"\\"+"DownloadCenter"):
pass
else:
os.makedirs(folder+"\\"+"DownloadCenter")
infoName = folder+"\\"+r'DownloadCenter_catgroupDownloads.txt'
pdflist = []
with open(infoName, 'r') as f:
lines = f.readlines()
for i in range(len(lines)):
if (".pdf" in lines[i]) or (".PDF" in lines[i]) or (".STEP" in lines[i]) or (".step" in lines[i]):
# print(lines[i].replace('href="','">').split('">'))
pdflist.append(lines[i].replace('href="','">').split('">')[1])
downpdf(pdflist)