import os,sys def Test(rootDir): list_dirs=os.walk(rootDir) for root,dirs,files in list_dirs: for f in files: myfile=os.path.join(root,f) myip=f[:f.rindex('.')] if myfile.endswith("html"): Fun(myip,myfile)
import sys,urllib2 req=urllib2.Request("http://www.baidu.com") fd=urllib2.urlopen(req) print fd.geturl() for key,value in fd.info().items(): print "%s = %s" %(key,value)
# -*- coding: cp936 -*- """ 使用身份验证 1.创建继承于urllib2.HTTPPasswordMgr 2.重载find_user_password 3.创建OpenerMgr实例 结果是:如果请求的url需要身份验证,此程序会提示输入用户名和密码,如果无法通过身份验证,程序异常. """ import sys,urllib2,getpass # 创建HTTPPasswordMgr类 class TerminalPassword(urllib2.HTTPPasswordMgr): # 重载find_user_password方法 def find_user_password(self,realm,authuri): retval = urllib2.HTTPPasswordMgr.find_user_password(self,realm,authuri) if retval[0] == None and retval[1] == None: sys.stdout.write("Login required for %s at %s\n" % (realm,authuri)) sys.stdout.write("Username:") username = sys.stdin.readline().rstrip() password = getpass.getpass().rstrip() return (username,password) else: return retval req = urllib2.Request("http://www.unicode.org/mail-arch/unicode-ml") # 创建opener opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(TerminalPassword())) fd = opener.open(req) print ("Retrieved",fd.geturl()) info = fd.info() for key,value in info.items(): print ("%s = %s" % (key,value))
这里最大的问题是:class TerminalPassword(urllib2.HTTPPasswordMgr):
这行代码,这里定义的一个TerminalPassword类,并扩展了urllib2.HTTPPasswordMgr类。
这个扩展允许程序在需要的时候向操作员询问用户名和密码。
还有就是这个build_opener(),这个函数允许指定额外的处理程序。
#!/usr/bin/python #Submit GET Data - Chapter 6 - submit_get.py import sys, urllib2, urllib def addGETdata(url, data): return url+'?'+urllib.urlencode(data) zipcode=sys.argv[1] url=addGETdata('http://www.wunderground.com/cgi-bin/findweather/getForecast', [('query',zipcode)]) print "Using URL",url req=urllib2.Request(url) fd=urllib2.urlopen(req) while 1: data=fd.read(1024) if not len(data): break sys.stdout.write(data)
#!/usr/bin/python #Subit POST Data - Chapter 6 - submit_post.py import sys,urllib2,urllib zipcode=sys.argv[1] url='http://www.wunderground.com/cgi-bin/findweather/getForecast' data=urllib.urlencode([('query',zipcode)]) req=urllib2.Request(url) fd=urllib2.urlopen(req,data) while 1: data=fd.read(1024) if not len(data): break sys.stdout.write(data)Post的数据是以单独的一部分发送的,而get是附在url后面的,适用于需要交换大量数据的时候
#!/usr/bin/python #!/usr/bin/env python # Obtain Web Page Information With Simple Error Handling - Chapter 6 # error_basic.py import sys, urllib2 req = urllib2.Request(sys.argv[1]) try: fd = urllib2.urlopen(req) except urllib2.URLError, e: print "Error retrieving data:", e sys.exit(1) print "Retrieved", fd.geturl() info = fd.info() for key, value in info.items(): print "%s = %s" % (key, value) 这个程序好像和前面的差不多啊! 就是多了个try 上面说到了urllib2.URLError这里还要讲到他的一个儿子 #!/usr/bin/python #Obtain Web Page Information With Error Document Handling - Chapter 6 # error_doc.py import sys,urllib2 req=urllib2.Request(sys.argv[1]) try: fd= urllib2.urlopen(req) except urllib2.HTTPError,e: print "Error retrieving data:",e print "Server error document follows:\n" print e.read() sys.exit(1) except urllib2.URLError, e: print "Error retrieving data:",e sys.exit(2) print "Retrived",fd.geturl() info=fd.info() for key, value in info.items(): print "%s=%s" % (key,value)