Web客户端验证参数-auth

作用及类型

1、针对于需要web客户端用户名密码认证的网站
2、auth = ('username','password')

import requests
from lxml import etree
class NoteSpider(object):
    def __init__(self):
        self.url = "http://code.tarena.com.cn/"
        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
        #定义web客户端验证参数auth
        self.auth = ("tarenacode","code_2013")
    def get_code(self):
        html = requests.get(url = self.url,auth = self.auth,headers = self.headers).content.decode("utf-8")
        #解析提取数据
        parse_html = etree.HTML(html)
        r_list = parse_html.xpath("//a/@href")
        print(r_list[1:])
if __name__ == '__main__':
    n = NoteSpider()
    n.get_code()

把课件下载到本地

import requests
from lxml import etree
import os
​
class NoteSpider(object):
    def __init__(self):
        self.url = 'http://code.tarena.com.cn/AIDCode/aid1909/16_spider/'
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}
        self.auth = ('tarenacode','code_2013')
​
        self.directory = '/home/tarena/code/' + '/'.join(self.url.split('/')[3:])
        if not os.path.exists(self.directory):
            os.makedirs(self.directory)
​
    def get_html(self):
        html = requests.get(url=self.url,auth=self.auth,headers=self.headers).text
        p = etree.HTML(html)
        r_list = p.xpath('//a/text()')
        for r in r_list:
            if r.endswith('.zip') or r.endswith('.rar'):
                self.download_file(r)
​
    def download_file(self,r):
        file_url = self.url + r
        html = requests.get(url=file_url,auth=self.auth,headers=self.headers).content
​
        filename = self.directory + r
        with open(filename,'wb') as f:
            f.write(html)
​
        print(filename,'下载成功')
​
if __name__ == '__main__':
    spider = NoteSpider()
    spider.get_html()

你可能感兴趣的:(Web客户端验证参数-auth)