python实现网络爬虫学习总结

import urllib2
import urllib

def openUrl(strUrl):
    response = urllib2.urlopen(strUrl)
    retContent = response.read()
    return retContent
    
def writeFile(content, outputPath, mode='w'):
    strInput = ''
    if type(content) != type(""):
        return False
    else:
        f = open(outputPath, mode)
        f.write(content)
        f.close
        return True

'''retUrl = openUrl('')
writeFile(retUrl, 'login.html', 'w')
'''

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
url = ''
data = {}
data['uid'] = ''
data['password'] = '
#data['verifyCode'] = '2345'
data = urllib.urlencode(data)
req = urllib2.Request(url, data)
try:
    response = urllib2.urlopen(req)
except urllib2.URLError, e:
    print e.code
    print '=============='
    response = "request error"
if type(response) != type(''):
    the_page = response.read()
else:
    the_page = 'error'
writeFile(the_page, 'postLogin.html')
 
自己写的实例

你可能感兴趣的:(python)