import urllib,urllib2,cookielib,socket,httplib import os url = "http://www.qq.com" def use_urllib(): page = urllib.urlopen(url) print "status:",page.getcode() #200请求成功,404未找到 print "url:", page.geturl() print "head_info:\n", page.info() print "fileno:",page.fileno() print "page.readline():",page.readline() print "page.readlines():",len(page.readlines()) print "close file:",page.close() def urllib_other_functions(): str = 'this is "K"' astr = urllib.quote(str) print 'quote:',astr print 'unquote:',urllib.unquote(astr) bstr = urllib.quote_plus(str) print 'quote_plus:',bstr print 'unquote_plus',urllib.unquote_plus(bstr) params = {"a":"1","b":"2"} print 'urlencode:',urllib.urlencode(params) l2u = urllib.pathname2url(r'c:\win\2') print 'convert pathname to url: ',l2u print 'convert url to path:',urllib.url2pathname(l2u) def callback_f(downloaded_size, block_size, romote_total_size): per = 100.0 * downloaded_size * block_size / romote_total_size if per > 100: per = 100 print "%.2f%%"% per def use_urllib_retrieve(): url = 'http://www.baidu.com' local = os.path.join(os.path.abspath("./"), "a.html") print local urllib.urlretrieve(url,local,callback_f) def use_httplib(): import httplib conn = httplib.HTTPConnection("www.baidu.com") #HTTPConnection(host[, port[, strict[, timeout[, source_address]]]]) i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5", "Accept": "text/plain"} conn.request("GET", "/", headers = i_headers) r1 = conn.getresponse() print "version:", r1.version print "reason:", r1.reason print "status:", r1.status print "msg:", r1.msg print "headers:", r1.getheaders() data = r1.read() print len(data) conn.close() url = "http://www.qq.com/" #最简单方式 def use_urllib2(): try: f = urllib2.urlopen(url, timeout=5).read() except urllib2.URLError, e: print e.reason print len(f) #使用Request def get_request(): #可以设置超时 socket.setdefaulttimeout(5) #可以加入参数 [无参数,使用get,以下这种方式,使用post] params = {"wd":"python"} #可以加入请求头信息,以便识别 i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5", "Accept": "text/plain"} #use post,have some params post to server,if not support ,will throw exception #req = urllib2.Request(url, data=urllib.urlencode(params), headers=i_headers) req = urllib2.Request(url, headers=i_headers) #创建request后,还可以进行其他添加,若是key重复,后者生效 req.add_header('Accept','application/json') #可以指定提交方式 #req.get_method = lambda: 'PUT' try: page = urllib2.urlopen(req) print len(page.read()) #like get #url_params = urllib.urlencode({"wd":"python"}) #final_url = url + "s?" + url_params #print final_url #data = urllib2.urlopen(final_url).read() #print "Method:get ", len(data) except urllib2.HTTPError, e: print "Error Code:", e.code except urllib2.URLError, e: print "Error Reason:", e.reason def use_proxy(enable_proxy): #enable_proxy = False proxy_handler = urllib2.ProxyHandler({"http":"http://www.baidu.com:8080"}) null_proxy_handler = urllib2.ProxyHandler({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler) else: opener = urllib2.build_opener(null_proxy_handler, urllib2.HTTPHandler) #此句设置urllib2的全局opener urllib2.install_opener(opener) content = urllib2.urlopen(url).read() print "proxy len:",len(content) #print content