getjob

[op@TIM getpage]$ cat job.py

#coding: utf-8

#title..href...

import urllib.request

import time



url=['']*30

page=83909

j=0

while j<30:

    url[j]='http://job.csdn.net/Job/Index?jobID='+str(page)

    content=urllib.request.urlopen(url[j]).read().decode('utf-8')

    open(r'job/'+str(page)+'.html','w+',encoding='utf-8').write(content)

    print('donwloading ',j,' page:',url[j])

    j=j+1

    page=page+1

    time.sleep(4)

else:    

    print('download article finished')

 

你可能感兴趣的:(get)