harbor 定期清理 registry

环境:centos7.6,harbor1.9,python2.7

  • 定期清理
    harbor 作为私有 docker 仓库,经常在 cicd 中使用,导致每个镜像会有很多 tag,不及时清理,有时会导致 harbor 服务器存储空间用完。下面的脚本用于定期清理 harbor 仓库中的 docker 镜像,每个镜像保留最新的 5 个 tag。

1、定期清理 harbor 镜像 python 脚本,适用于 python2.7。(改自https://www.cnblogs.com/kaishirenshi/p/11461504.html,该文章脚本适用于 python3)


#!/bin/python
# coding: utf-8

from operator import itemgetter
from urllib import pathname2url
import requests

import datetime as dt
# import maya

import logging
logging.basicConfig(filename='harbor_clean.txt', filemode="w", level=logging.INFO)

logger = logging.getLogger(__name__)

"""
清理 Harbor 仓库的老镜像
"""


class HarborCleaner(object):
    delete_status = {
        200: "Delete tag successfully.",    
        400: "Invalid repo_name.",
        401: "Unauthorized.",
        403: "Forbidden.",
        404: "Repository or tag not found.",
    }

def __init__(self, user, password, hostname, port, use_https=False):
        scheme = "https" if use_https else "http"
        api_base = "{scheme}://{hostname}:{port}/api"
        api_base = api_base.format(scheme=scheme,hostname=hostname,port=port)
        self.search_api = api_base + "/search?q={key_word}"
        self.projects_api = api_base + "/projects"
        self.repository_query_api = api_base + "/repositories?project_id={project_id}"
        # repo_name 一般为 "project_name/repo_name" 格式,必须做转义处理(因为中间有斜杠)
        self.repository_tags_api = api_base + "/repositories/{repo_name}/tags"
        self.repository_tag_api = self.repository_tags_api + "/{tag}"

        self.session = requests.Session()
        self.session.verify = False  # 如果公司是使用自签名证书,不能通过 SSL 验证,就需要设置这个
        self.session.headers = {
            "Accept": "application/json"
        }

        self.session.auth = (user, password)


    def get_all_projects(self):
        resp = self.session.get(self.projects_api)
        
        success = resp.status_code == 200
        return {
            "success": success,
            "data": resp.json() if success else resp.text
        }

def get_all_repos(self, project):
        url = self.repository_query_api.format(project_id=project['project_id'])
        resp = self.session.get(url)

        success = resp.status_code == 200
        return {
            "success": success,
            "data": resp.json() if success else resp.text
        }

    def get_all_tags(self, repo):
        """repo_name 需要做转义"""
        repo_name = pathname2url(repo['name'])
        url = self.repository_tags_api.format(repo_name=repo_name)
        resp = self.session.get(url)

        success = resp.status_code == 200
        return {
            "success": success,
            "data": resp.json() if success else resp.text
        }
    
    def get_tags_except_lastest_n(self, repo, n):
        """获取除了最新的 n 个 tag 之外的所有 tags"""

        # 如果 镜像 tags 数小于 n+1,说明该镜像很干净,不需要做清理。
        if repo['tags_count'] <= n:  # +1 是因为 latest 是重复的 tag
            return []
        
        result = self.get_all_tags(repo)
        tags = []
        tags = result['data']
        for tag in tags:
            # tag['time'] = maya.MayaDT.from_iso8601(tag['created'])

            # '2019-04-09T11:33:49.296960745Z'
            # # python 自带的解析函数,只能处理 6 位小数,下面截去多余的三位
            timestamp = tag['created'][:-4] + 'Z'
            tag['time'] = dt.datetime.strptime(timestamp, r'%Y-%m-%dT%H:%M:%S.%fZ')

        tags.sort(key=itemgetter('time'))  # 使用 time 键进行原地排序
        return tags[:-n]  # expect the latest n tags, -1 是因为 latest 是重复的 tag

def soft_delete_tag(self, repo, tag):
        """repo_name 需要做转义
        这里删除后,还需要进行一次 GC,才能真正地清理出可用空间。
        """        
        repo_name = pathname2url(repo['name'])
        url = self.repository_tag_api.format(repo_name=repo_name, tag=tag['name'])
        resp = self.session.delete(url)

        return {
            "success": resp.status_code == 200,
            "message": self.delete_status.get(resp.status_code)
        }

    def soft_delete_all_tags_except_latest_n(self, n):
        """从每个仓库中,删除所有的 tags,只有最新的 n 个 tag 外的所有 tags 除外"""
        res_projects = self.get_all_projects()
        if not res_projects['success']:
            logger.warning("faild to get all projects, message: {}".format(res_projects['data']))

        logger.info("we have {} projects".format(len(res_projects['data'])))
        for p in res_projects['data']:
            res_repos = self.get_all_repos(p)
            if not res_projects['success']:
                logger.warning("faild to get all repos in project: {}, message: {}".format(p['name'], res_repos['data']))

            logger.info("we have {} repos in project:{}".format(len(res_repos['data']), p['name']))
            for repo in res_repos['data']:
                logger.info("deal with repo: {}".format(repo['name']))

                old_tags = self.get_tags_except_lastest_n(repo, n)
                logger.info("we have {} tags to delete in repo: {}".format(len(old_tags), repo['name']))
                for tag in old_tags:
                    logger.info("try to delete repo:{}, tag: {}, create_time: {}".format(repo['name'], tag['name'], tag['created']))
                    result = self.soft_delete_tag(repo, tag)
                    if result['success']:
                        logger.info("success delete it.")
                    else:
                        logger.warning("delete failed!, message: {}".format(result['message']))

if __name__ == "__main__":
    # 1. 通过 harbor 的 restful api 进行软删除
    harbor_cleaner = HarborCleaner(
         user="admin",
         password="Harbor12345",
         hostname="192.168.17",
         port=80
    )
    harbor_cleaner.soft_delete_all_tags_except_latest_n(5)  # 每个镜像只保留最新的十个 tag

    # 2. 进行一次 GC,清除掉所有已软删除的 images
    # 2.1 harbor 1.7 之前的版本,需要停机才能 GC

    """
cd /volume1/docker/harbor/harbor
docker-compose stop  # 停机
# 下面的 tag 'v2.6.2-v1.4.0' 需要换成当前使用的 registry-photon 镜像的版本号
# --dry-run 表示尝试进行 GC,输出 log 与正式 gc 一致,可用于提前发现问题
docker run -it --name gc --rm --volumes-from registry vmware/registry-photon:v2.6.2-v1.4.0 garbage-collect --dry-run /etc/registry/config.yml
# 正式 gc,这个才会真正的 gc 掉已经软删除的镜像
docker run -it --name gc --rm --volumes-from registry vmware/registry-photon:v2.6.2-v1.4.0 garbage-collect /etc/registry/config.yml
    """

    # 2.2 harbor 1.7+ 可以通过 restful api 进行在线 GC 或定期自动 GC。

2、cronjob 定时,每周周日执行清理任务

[root@harbor-bak harbor]# crontab -l
0 0 * * 0 /bin/python /root/data/harbor/harbor_gc.py

3、harbor 登陆 http://192.168.17,配置:任务 – 垃圾清理 – 每周
harbor 可以通过 ui 清理镜像,但是释放空间必须关闭仓库(1.7 版本以前),

  • 常用 api

1、查看 project

curl -u admin:Harbor12345 -X GET "http://192.168.17/api/projects" -H "accept: application/json"

2、查看 project 下面对应的 repository 和 repo 信息

curl -u admin:Harbor12345 -X GET "http://192.168.17/api/repositories?project_id=5" -H "accept: application/json"

3、查看 repository/repo tags

curl -u admin:Harbor12345 -X GET "http://192.168.17/api/repositories/gpu%2Fiot-user/tags" -H "accept: application/json"

4、删除镜像

curl -X DELETE "
http://192.168.17/api/repositories/infra%2Fbusybox/tags/v1.0"
; -H "accept: application/json"

参考文章

https://www.cnblogs.com/kaishirenshi/p/11461504.html

你可能感兴趣的:(运维,linux)