Python模块之---urllib,urllib2,httplib

import urllib,urllib2,cookielib,socket,httplib
import os

url = "http://www.qq.com"

def use_urllib():
    page = urllib.urlopen(url)
    print "status:",page.getcode() #200请求成功,404未找到
    print "url:", page.geturl()
    print "head_info:\n",  page.info()
    print "fileno:",page.fileno()
    print "page.readline():",page.readline()
    print "page.readlines():",len(page.readlines())
        
    print "close file:",page.close()
def urllib_other_functions():
    str = 'this is  "K"'
    astr = urllib.quote(str)
    print 'quote:',astr
    print 'unquote:',urllib.unquote(astr)

    bstr = urllib.quote_plus(str)
    print 'quote_plus:',bstr
    print 'unquote_plus',urllib.unquote_plus(bstr)

    params = {"a":"1","b":"2"}
    print 'urlencode:',urllib.urlencode(params)

    l2u = urllib.pathname2url(r'c:\win\2')
    print 'convert pathname to url: ',l2u
    print 'convert url to path:',urllib.url2pathname(l2u)

def callback_f(downloaded_size, block_size, romote_total_size):
    per = 100.0 * downloaded_size * block_size / romote_total_size
    if per > 100:
        per = 100
    print "%.2f%%"% per

def use_urllib_retrieve():
    url = 'http://www.baidu.com'
    local = os.path.join(os.path.abspath("./"), "a.html")
    print local
    urllib.urlretrieve(url,local,callback_f)

def use_httplib():  
  import httplib  
  conn = httplib.HTTPConnection("www.baidu.com")  #HTTPConnection(host[, port[, strict[, timeout[, source_address]]]])
  i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",  
             "Accept": "text/plain"}
  conn.request("GET", "/", headers = i_headers)
  r1 = conn.getresponse()
  print "version:", r1.version
  print "reason:", r1.reason
  print "status:", r1.status  
  print "msg:", r1.msg
  print "headers:", r1.getheaders() 
  data = r1.read()
  print len(data) 
  conn.close()

url = "http://www.qq.com/"   
#最简单方式  
def use_urllib2():  
  try:  
    f = urllib2.urlopen(url, timeout=5).read()  
  except urllib2.URLError, e:  
    print e.reason  
  print len(f)  
  
#使用Request  
def get_request():  
  #可以设置超时
  socket.setdefaulttimeout(5)  
  #可以加入参数  [无参数,使用get,以下这种方式,使用post]  
  params = {"wd":"python"}
  #可以加入请求头信息,以便识别
  i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",  
             "Accept": "text/plain"}
  #use post,have some params post to server,if not support ,will throw exception  
  #req = urllib2.Request(url, data=urllib.urlencode(params), headers=i_headers)  
  req = urllib2.Request(url, headers=i_headers)  
  
  #创建request后,还可以进行其他添加,若是key重复,后者生效  
  req.add_header('Accept','application/json')  
  #可以指定提交方式
  #req.get_method = lambda: 'PUT'
  try:
    page = urllib2.urlopen(req)
    print len(page.read())
    #like get
    #url_params = urllib.urlencode({"wd":"python"})  
    #final_url = url + "s?" + url_params  
    #print final_url
    #data = urllib2.urlopen(final_url).read()
    #print "Method:get ", len(data)
  except urllib2.HTTPError, e:
    print "Error Code:", e.code
  except urllib2.URLError, e:
    print "Error Reason:", e.reason
  
def use_proxy(enable_proxy):  
  #enable_proxy = False
  proxy_handler = urllib2.ProxyHandler({"http":"http://www.baidu.com:8080"})  
  null_proxy_handler = urllib2.ProxyHandler({})
  if enable_proxy:  
    opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler)  
  else:
    opener = urllib2.build_opener(null_proxy_handler, urllib2.HTTPHandler)  
  #此句设置urllib2的全局opener  
  urllib2.install_opener(opener)  
  content = urllib2.urlopen(url).read()  
  print "proxy len:",len(content)
  #print content




你可能感兴趣的:(Python模块之---urllib,urllib2,httplib)