selenium 自动下载118版本以上的chromedriver驱动

import requests, json, os, shutil
from lxml.etree import HTML

def upzip_file(zip_path=None, unzip_path=None):
    """
    :zip_path 压缩文件路径
    :unzip_path 解压文件路径
    :return 解压 zip 文件,返回所有解压文件夹下的路径
    """
    import zipfile
    zip_file = zipfile.ZipFile(zip_path)
    if not os.path.isdir(unzip_path):
        os.mkdir(unzip_path)
    for names in zip_file.namelist():
        zip_file.extract(names, unzip_path)
    zip_file.close()
    return [os.path.join(unzip_path, i).replace('\\', '/') for i in zip_file.namelist()]


def down_chromedriver_zip(urll=None, path=None, version=None):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
    }
    res1 = requests.get(urll, headers=headers)
    response1 = json.loads(res1.text)
    for i in response1:
        if 'chromedriver_win32' in i['name']:
            zip_url = i['url']
            name = zip_url.split('/')[-1]
            zip_path = os.path.join(path, name)
            res2 = requests.get(zip_url, headers=headers)
            with open(zip_path, 'wb') as f:
                f.write(res2.content)
            uzip_path = zip_path.replace('.zip', '')
            paths = upzip_file(zip_path, uzip_path)
            for chromedriver_path in paths:
                if not chromedriver_path.endswith('.exe'):
                    if os.path.exists(chromedriver_path):
                        os.remove(chromedriver_path)
                    continue
                os.rename(chromedriver_path, chromedriver_path.replace('.exe', '') + f'_{version}.exe')
            if os.path.exists(zip_path):
                os.remove(zip_path)


def get_chromedriver_version1(path, version=None):
    url = 'https://googlechromelabs.github.io/chrome-for-testing/#stable'
    #
    zip_url = None
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
    }
    try:
        res = requests.get(url, headers=headers)
        response = HTML(res.text)
        trs = response.xpath('//section/div[@class="table-wrapper"]/table//tr')
        for tr in trs:
            zip_url = tr.xpath('./td/code[contains(text(), "zip")]/text()')
            if 'chromedriver-win32' not in ''.join(zip_url):
                continue
            code = tr.xpath('./td/code[not(contains(text(), "zip"))]/text()')
            if '200' not in ''.join(code):
                continue
            Binary = tr.xpath('./th[1]/code/text()')
            if 'chromedriver' not in Binary:
                continue
            zip_url = zip_url[0]
            if str(version) in zip_url:
                break
    except Exception as e:
        print(e)
        pass
    try:
        name = zip_url.split('/')[-1]
        zip_path = os.path.join(path, name)
        res2 = requests.get(zip_url, headers=headers)
        with open(zip_path, 'wb') as f:
            f.write(res2.content)
        uzip_path = zip_path.replace('.zip', '').replace('chromedriver-win32', 'chromedriver_win32')
        # uzip_path = './chromedriver_win32'
        paths = upzip_file(zip_path, uzip_path)
        for chromedriver_path in paths:
            if not chromedriver_path.endswith('.exe'):
                if os.path.exists(chromedriver_path):
                    os.remove(chromedriver_path)
                continue
            # os.rename(chromedriver_path, chromedriver_path.replace('.exe', '') + f'_{version}.exe')
            shutil.copy(chromedriver_path, os.path.join(uzip_path, f'chromedriver_{version}.exe'))
            os.remove(chromedriver_path)
        exe_path = os.path.join(uzip_path, 'chromedriver-win32')
        if os.path.exists(exe_path):
            os.rmdir(exe_path)
        if os.path.exists(zip_path):
            os.remove(zip_path)
    except Exception as e:
        print(e)
        pass


def get_chromedriver_version(path, version0=None):
    url2 = 'https://registry.npmmirror.com/-/binary/chromedriver/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
    }
    if version0 and int(version0) > 116:
        get_chromedriver_version1(path, version0)
        return
    try:
        res = requests.get(url2, headers=headers)
        response = json.loads(res.text)
    except:
        response = {}
    versions = []
    dic = {}
    for i in response:
        if not i['name'].split('.')[0].isdigit():
            continue
        version = int(i['name'].split('.')[0])
        if i['name'].startswith(f'{version0}.'):
            versions.append({version: i['url']})
        if len(dic) == 0:
            dic[version] = i['url']
        if len(dic) > 0 and version > list(dic.keys())[0]:
            dic.pop(list(dic.keys())[0])
            dic[version] = i['url']
    if len(versions) == 0 and len(dic) > 0:
        versions.append(dic)
    if len(versions) > 0:
        for k, zip_url in versions[-1].items():
            try:
                down_chromedriver_zip(zip_url, path, k)
            except:
                pass

if __name__ == '__main__':
    get_chromedriver_version('./', 118)

你可能感兴趣的:(selenium)