- 私有化部署Docker仓库随着发布次数增加,仓库中镜像体积也随之增大;所以需在定时清理一些不需要的镜像;以下通过python脚本进行清理任务
1、docker registry 配制
进入容器 cd /etc/docker/registry
编辑config.yml 在storage下添加delete属性;registry的配置文件中开启允许删除功能
storage:
cache:
blobdescriptor: inmemory
filesystem:
rootdirectory: /var/lib/registry
delete:
enabled: true
2、创建shell脚本
#!/bin/bash
python delete_docker_registry_data.py # 源码定义如下
# 进入容器执行garbage-collect真正清除标记的镜像
docker exec -it registry registry garbage-collect /etc/docker/registry/config.yml
3、delete_docker_registry_data.py
# 需修改main方法中三处注释地方
#!/usr/bin/python
import json
import logging
import os
import sys
import shutil
import glob
import requests
logger = logging.getLogger(__name__)
def del_empty_dirs(s_dir, top_level):
"""recursively delete empty directories"""
b_empty = True
for s_target in os.listdir(s_dir):
s_path = os.path.join(s_dir, s_target)
if os.path.isdir(s_path):
if not del_empty_dirs(s_path, False):
b_empty = False
else:
b_empty = False
if b_empty:
logger.debug("Deleting empty directory '%s'", s_dir)
if not top_level:
os.rmdir(s_dir)
return b_empty
def get_layers_from_blob(path):
"""parse json blob and get set of layer digests"""
try:
with open(path, "r") as blob:
data_raw = blob.read()
data = json.loads(data_raw)
if data["schemaVersion"] == 1:
result = set([entry["blobSum"].split(":")[1] for entry in data["fsLayers"]])
else:
result = set([entry["digest"].split(":")[1] for entry in data["layers"]])
if "config" in data:
result.add(data["config"]["digest"].split(":")[1])
return result
except Exception as error:
logger.critical("Failed to read layers from blob:%s", error)
return set()
def get_digest_from_blob(path):
"""parse file and get digest"""
try:
with open(path, "r") as blob:
return blob.read().split(":")[1]
except Exception as error:
logger.critical("Failed to read digest from blob:%s", error)
return ""
def get_links(path, _filter=None):
"""recursively walk `path` and parse every link inside"""
result = []
for root, _, files in os.walk(path):
for each in files:
if each == "link":
filepath = os.path.join(root, each)
if not _filter or _filter in filepath:
result.append(get_digest_from_blob(filepath))
return result
class RegistryCleanerError(Exception):
pass
class RegistryCleaner(object):
"""Clean registry"""
def __init__(self, registry_data_dir, dry_run=False):
self.registry_data_dir = registry_data_dir
if not os.path.isdir(self.registry_data_dir):
raise RegistryCleanerError("No repositories directory found inside " \
"REGISTRY_DATA_DIR '{0}'.".
format(self.registry_data_dir))
self.dry_run = dry_run
def _delete_layer(self, repo, digest):
"""remove blob directory from filesystem"""
path = os.path.join(self.registry_data_dir, "repositories", repo, "_layers/sha256", digest)
self._delete_dir(path)
def _delete_blob(self, digest):
"""remove blob directory from filesystem"""
path = os.path.join(self.registry_data_dir, "blobs/sha256", digest[0:2], digest)
self._delete_dir(path)
def _blob_path_for_revision(self, digest):
"""where we can find the blob that contains the json describing this digest"""
return os.path.join(self.registry_data_dir, "blobs/sha256",
digest[0:2], digest, "data")
def _blob_path_for_revision_is_missing(self, digest):
"""for each revision, there should be a blob describing it"""
return not os.path.isfile(self._blob_path_for_revision(digest))
def _get_layers_from_blob(self, digest):
"""get layers from blob by digest"""
return get_layers_from_blob(self._blob_path_for_revision(digest))
def _delete_dir(self, path):
"""remove directory from filesystem"""
if self.dry_run:
logger.info("DRY_RUN: would have deleted %s", path)
else:
logger.info("Deleting %s", path)
try:
shutil.rmtree(path)
except Exception as error:
logger.critical("Failed to delete directory:%s", error)
def _delete_from_tag_index_for_revision(self, repo, digest):
"""delete revision from tag indexes"""
paths = glob.glob(
os.path.join(self.registry_data_dir, "repositories", repo,
"_manifests/tags/*/index/sha256", digest)
)
for path in paths:
self._delete_dir(path)
def _delete_revisions(self, repo, revisions, blobs_to_keep=None):
"""delete revisions from list of directories"""
if blobs_to_keep is None:
blobs_to_keep = []
for revision_dir in revisions:
digests = get_links(revision_dir)
for digest in digests:
self._delete_from_tag_index_for_revision(repo, digest)
if digest not in blobs_to_keep:
self._delete_blob(digest)
self._delete_dir(revision_dir)
def _get_tags(self, repo):
"""get all tags for given repository"""
path = os.path.join(self.registry_data_dir, "repositories", repo, "_manifests/tags")
if not os.path.isdir(path):
logger.critical("No repository '%s' found in repositories directory %s",
repo, self.registry_data_dir)
return None
result = []
for each in os.listdir(path):
filepath = os.path.join(path, each)
if os.path.isdir(filepath):
result.append(each)
return result
def _get_repositories(self):
"""get all repository repos"""
result = []
root = os.path.join(self.registry_data_dir, "repositories")
for each in os.listdir(root):
filepath = os.path.join(root, each)
if os.path.isdir(filepath):
inside = os.listdir(filepath)
if "_layers" in inside:
result.append(each)
else:
for inner in inside:
result.append(os.path.join(each, inner))
return result
def _get_all_links(self, except_repo=""):
"""get links for every repository"""
result = []
repositories = self._get_repositories()
for repo in [r for r in repositories if r != except_repo]:
path = os.path.join(self.registry_data_dir, "repositories", repo)
for link in get_links(path):
result.append(link)
return result
def prune(self):
"""delete all empty directories in registry_data_dir"""
del_empty_dirs(self.registry_data_dir, True)
def _layer_in_same_repo(self, repo, tag, layer):
"""check if layer is found in other tags of same repository"""
for other_tag in [t for t in self._get_tags(repo) if t != tag]:
path = os.path.join(self.registry_data_dir, "repositories", repo,
"_manifests/tags", other_tag, "current/link")
manifest = get_digest_from_blob(path)
try:
layers = self._get_layers_from_blob(manifest)
if layer in layers:
return True
except IOError:
if self._blob_path_for_revision_is_missing(manifest):
logger.warn("Blob for digest %s does not exist. Deleting tag manifest: %s", manifest, other_tag)
tag_dir = os.path.join(self.registry_data_dir, "repositories", repo,
"_manifests/tags", other_tag)
self._delete_dir(tag_dir)
else:
raise
return False
def _manifest_in_same_repo(self, repo, tag, manifest):
"""check if manifest is found in other tags of same repository"""
for other_tag in [t for t in self._get_tags(repo) if t != tag]:
path = os.path.join(self.registry_data_dir, "repositories", repo,
"_manifests/tags", other_tag, "current/link")
other_manifest = get_digest_from_blob(path)
if other_manifest == manifest:
return True
return False
def delete_entire_repository(self, repo):
"""delete all blobs for given repository repo"""
logger.debug("Deleting entire repository '%s'", repo)
repo_dir = os.path.join(self.registry_data_dir, "repositories", repo)
if not os.path.isdir(repo_dir):
raise RegistryCleanerError("No repository '{0}' found in repositories "
"directory {1}/repositories".
format(repo, self.registry_data_dir))
links = set(get_links(repo_dir))
all_links_but_current = set(self._get_all_links(except_repo=repo))
for layer in links:
if layer in all_links_but_current:
logger.debug("Blob found in another repository. Not deleting: %s", layer)
else:
self._delete_blob(layer)
self._delete_dir(repo_dir)
def delete_repository_tag(self, repo, tag):
"""delete all blobs only for given tag of repository"""
logger.debug("Deleting repository '%s' with tag '%s'", repo, tag)
tag_dir = os.path.join(self.registry_data_dir, "repositories", repo, "_manifests/tags", tag)
if not os.path.isdir(tag_dir):
raise RegistryCleanerError("No repository '{0}' tag '{1}' found in repositories "
"directory {2}/repositories".
format(repo, tag, self.registry_data_dir))
manifests_for_tag = set(get_links(tag_dir))
revisions_to_delete = []
blobs_to_keep = []
layers = []
all_links_not_in_current_repo = set(self._get_all_links(except_repo=repo))
for manifest in manifests_for_tag:
logger.debug("Looking up filesystem layers for manifest digest %s", manifest)
if self._manifest_in_same_repo(repo, tag, manifest):
logger.debug("Not deleting since we found another tag using manifest: %s", manifest)
continue
else:
revisions_to_delete.append(
os.path.join(self.registry_data_dir, "repositories", repo,
"_manifests/revisions/sha256", manifest)
)
if manifest in all_links_not_in_current_repo:
logger.debug("Not deleting the blob data since we found another repo using manifest: %s", manifest)
blobs_to_keep.append(manifest)
layers.extend(self._get_layers_from_blob(manifest))
layers_uniq = set(layers)
for layer in layers_uniq:
if self._layer_in_same_repo(repo, tag, layer):
logger.debug("Not deleting since we found another tag using digest: %s", layer)
continue
self._delete_layer(repo, layer)
if layer in all_links_not_in_current_repo:
logger.debug("Blob found in another repository. Not deleting: %s", layer)
else:
self._delete_blob(layer)
self._delete_revisions(repo, revisions_to_delete, blobs_to_keep)
self._delete_dir(tag_dir)
def delete_untagged(self, repo):
"""delete all untagged data from repo"""
logger.debug("Deleting utagged data from repository '%s'", repo)
repositories_dir = os.path.join(self.registry_data_dir, "repositories")
repo_dir = os.path.join(repositories_dir, repo)
if not os.path.isdir(repo_dir):
raise RegistryCleanerError("No repository '{0}' found in repositories "
"directory {1}/repositories".
format(repo, self.registry_data_dir))
tagged_links = set(get_links(repositories_dir, _filter="current"))
layers_to_protect = []
for link in tagged_links:
layers_to_protect.extend(self._get_layers_from_blob(link))
unique_layers_to_protect = set(layers_to_protect)
for layer in unique_layers_to_protect:
logger.debug("layer_to_protect: %s", layer)
tagged_revisions = set(get_links(repo_dir, _filter="current"))
revisions_to_delete = []
layers_to_delete = []
dir_for_revisions = os.path.join(repo_dir, "_manifests/revisions/sha256")
for rev in os.listdir(dir_for_revisions):
if rev not in tagged_revisions:
revisions_to_delete.append(os.path.join(dir_for_revisions, rev))
for layer in self._get_layers_from_blob(rev):
if layer not in unique_layers_to_protect:
layers_to_delete.append(layer)
unique_layers_to_delete = set(layers_to_delete)
self._delete_revisions(repo, revisions_to_delete)
for layer in unique_layers_to_delete:
self._delete_blob(layer)
self._delete_layer(repo, layer)
def get_tag_count(self, repo):
logger.debug("Get tag count of repository '%s'", repo)
repo_dir = os.path.join(self.registry_data_dir, "repositories", repo)
tags_dir = os.path.join(repo_dir, "_manifests/tags")
if os.path.isdir(tags_dir):
tags = os.listdir(tags_dir)
return len(tags)
else:
logger.info("Tags directory does not exist: '%s'", tags_dir)
return -1
def main():
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(u'%(levelname)-8s [%(asctime)s] %(message)s'))
logger.addHandler(handler)
text = requests.get("http://127.0.0.1:5000/v2/_catalog",
headers={"Authorization": "xxxx"}) #改成本地实际地址及basic-auth-code
images = text.json()['repositories']
for image in images:
image = image
tag = 'latest' # 指定需要删了的tag
if 'REGISTRY_DATA_DIR' in os.environ:
registry_data_dir = os.environ['REGISTRY_DATA_DIR']
else:
registry_data_dir = "/data/registry-data/docker/registry/v2" #改成你本地实际路径
try:
cleaner = RegistryCleaner(registry_data_dir)
if 1==2:
cleaner.delete_untagged(image)
else:
if tag:
tag_count = cleaner.get_tag_count(image)
if tag_count == 1:
cleaner.delete_entire_repository(image)
else:
cleaner.delete_repository_tag(image, tag)
else:
cleaner.delete_entire_repository(image)
except RegistryCleanerError as error:
logger.fatal(error)
sys.exit(1)
if __name__ == "__main__":
main()