简单的PYTHON应用(使用了urllib, re等库)

下面是一个简单的PYTHON应用,主要 是使用了python的urllib,re等库,非常简单,可以作为其他的python在网络方面应用的模板(使用Python3测试)

#!/usr/bin/env python
import sys
import re
import urllib.request
from urllib.parse import urlparse
def download(url,flag):
    try:
        fd=urllib.request.urlopen(url) # Open the URL and get the file description
        page=fd.read() # Get the index page html content
        unicodePage=page.decode('gb2312') # Get the unicode page html content. Can display chinese character
        tempURL=urlparse(url)
        tempURL=tempURL.geturl()
        tempLIST=tempURL.split('/')
        fileName=tempLIST[-1] ##Get the file name via URL
        path=tempURL[0:tempURL.index(fileName)] ## get the path info
        print("Downloading: ",tempURL,";Saving: ",fileName)
   
        writefd=open(fileName,'w') ## get the write file description
        writefd.write(unicodePage) ## write to the file
        writefd.close()
    except:
        pass
   
    if flag==1: # flag==1 shows that the page is the index page
        """
        first get the url list
        then call download to download the url and saving the html to file
        """
        pattern=r'a href="([^"]+)"'
        linklist=re.findall(pattern,unicodePage)
       
        for item in linklist:
            if not item.startswith('http'):
                temp=path+item.strip()
                print("!!!!....",temp)
                download(temp,0)
if len(sys.argv)
url=sys.argv[1] # Get the URL address
download(url,1)
 

 

你可能感兴趣的:(html,python,网络应用)