python 批量下载文件

主要功能:

登陆一个需要权限的页面,对页面进行解析下载所有列表下的文件到本地目录。

#!/usr/bin/python2.7
#encoding=utf-8

import urllib2
from bs4 import BeautifulSoup

# create a password manager
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()

#url = "http://logs.yinonline.com:9080/rawwebyrd/100.10.80.94/2016/04/"
url = "http://logs.yinonline.com:9080/rawwebyrd/100.10.80.96/2016/04/"
username = "jin"
password = "2015@jyl"

# add the username and password to password manager.
password_mgr.add_password(None, url, username, password)

handler = urllib2.HTTPBasicAuthHandler(password_mgr)

# create "opener" (OpenerDirector instance)
opener = urllib2.build_opener(handler)

# use the opener to fetch a url
opener.open(url)

# Install the opener
urllib2.install_opener(opener)

# Now all calls to use urllib2.urlopen use our opener.
html = urllib2.urlopen(url).read()

soup = BeautifulSoup(html, "lxml")
tags = soup.findAll('a', {'href': True})
target_path = '/Users/leo/logs/service/access/'
for tag in tags:
    resource = tag['href']
    if resource.startswith("service_access."):
        print(url + resource)
        log = urllib2.urlopen(url + resource)
        with open(target_path + resource, 'wb') as output:
            output.write(log.read())

你可能感兴趣的:(python)