http://www.voidspace.org.uk/python/articles/authentication.shtml#base64
http://kentsjohnson.com/kk/00010.html
Basic Authentication is part of the HTTP protocol. When you browse to a web site that uses basic auth, the browser pops up a dialog asking for your username and password. You supply credentials to the browser; your credentials are included with every subsequent request to the same web site.
To authenticate using basic authentication you must configure an opener with an HTTPBasicAuthHandler. The handler itself must be configured with the authentication realm, server address, username and password. Here is an example:
# Create an OpenerDirector with support for Basic HTTP Authentication...
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password('realm', 'example.com', 'username', 'password')
opener = urllib2.build_opener(auth_handler)
The open() method of the returned opener can be used to read pages that require authentication:
data = opener.open('http://example.com/my/protected/page.html').read()
urllib2.install_opener(opener)
data = urllib2.urlopen('http://example.com/my/protected/page.html').read()
首先可以用如下代码判断是否可以读取该网页的url:
import urllib
#print urllib.urlopen('http://www.google.com').read()
theurl = urllib.urlopen('http://mail.163.com/')
print 'http status:', theurl.getcode()
for line in theurl: # 就像在操作本地文件
print line,
theurl.close()
import urllib2
theurl = 'https://cpanel.net/'
req = urllib2.Request(theurl)
try:
print 'urlopen begin'
handle = urllib2.urlopen(req)
print 'urlopen finish'
except IOError, e:
if hasattr(e, 'code'):
if e.code != 401:
print 'We got another error'
print e.code
else:
print 'yes'
print e.headers
print e.headers['www-authenticate']
else:
print r'the string "code" is not in e'
print e
else:
print 'this url can be accessed'
import urllib2
import sys
import re
import base64
from urlparse import urlparse
theurl = 'http://api.minicloud.com.cn/statuses/friends_timeline.xml'
# if you want to run this example you'll need to supply
# a protected page with your username and password
username = 'xxx'
password = 'xxx' # a very bad password
req = urllib2.Request(theurl)
try:
handle = urllib2.urlopen(req)
except IOError, e:
# here we *want* to fail
pass
else:
# If we don't fail then the page isn't protected
print "This page isn't protected by authentication."
sys.exit(1)
if not hasattr(e, 'code') or e.code != 401:
# we got an error - but not a 401 error
print "This page isn't protected by authentication."
print 'But we failed for another reason.'
sys.exit(1)
#!!!the following code is used to get scheme and realm which however are not used
#getting scheme and realm begin
authline = e.headers['www-authenticate']
# this gets the www-authenticate line from the headers
# which has the authentication scheme and realm in it
authobj = re.compile(
r'''(?:\s*www-authenticate\s*:)?\s*(\w*)\s+realm=['"]([^'"]+)['"]''',
re.IGNORECASE)
# this regular expression is used to extract scheme and realm
matchobj = authobj.match(authline)
if not matchobj:
# if the authline isn't matched by the regular expression
# then something is wrong
print 'The authentication header is badly formed.'
print authline
sys.exit(1)
scheme = matchobj.group(1)
realm = matchobj.group(2)
# here we've extracted the scheme
# and the realm from the header
if scheme.lower() != 'basic':
print 'This example only works with BASIC authentication.'
sys.exit(1)
#get scheme and realm finish
base64string = base64.encodestring(
'%s:%s' % (username, password))[:-1]
authheader = "Basic %s" % base64string
req.add_header("Authorization", authheader)
try:
handle = urllib2.urlopen(req)
except IOError, e:
# here we shouldn't fail if the username/password is right
print "It looks like the username or password is wrong."
sys.exit(1)
thepage = handle.read()