glance支持延迟删除镜像的功能,个人觉得挺实用的,特别是在误删除的情况下。从某种程度来说,也算是对数据一种另类保护吧。
大致实现原理是:有个delayed_delete设置是否开启延迟删除的开关,如果为True的话,每次删除镜像的时候都会把镜像的状态置为pending_delete,记录此刻的delete_time,有个scrubber的后台进程会每隔一段时间(wakeup_time)去check是否有pending_delete的镜像要删除,删除的判断标准是:该镜像被删除的那个时刻的delete_time + scrub_time <= time.time(),scrub_time是镜像要隔多少秒才真正被擦除掉。
开启delayed_delete
[root@controller2 ~(keystone_admin)]# vim /etc/glance/glance-api.conf delayed_delete = True
来看glance api端删除镜像的时候判断是否开启了delayed_delete的代码
# v1的api
glance/api/v1/p_w_picpath.py
@utils.mutating
def delete(self, req, id):
"""
Deletes the p_w_picpath and all its chunks from the Glance
:param req: The WSGI/Webob Request object
:param id: The opaque p_w_picpath identifier
:raises: HttpBadRequest if p_w_picpath registry is invalid
:raises: HttpNotFound if p_w_picpath or any chunk is not available
:raises: HttpUnauthorized if p_w_picpath or any chunk is not
deleteable by the requesting user
"""
self._enforce(req, 'delete_p_w_picpath')
p_w_picpath = self.get_p_w_picpath_meta_or_404(req, id)
if p_w_picpath['protected']:
msg = _("Image is protected")
LOG.warn(msg)
raise HTTPForbidden(explanation=msg,
request=req,
content_type="text/plain")
if p_w_picpath['status'] == 'pending_delete':
msg = (_("Forbidden to delete a %s p_w_picpath.") %
p_w_picpath['status'])
LOG.warn(msg)
raise HTTPForbidden(explanation=msg,
request=req,
content_type="text/plain")
elif p_w_picpath['status'] == 'deleted':
msg = _("Image %s not found.") % id
LOG.warn(msg)
raise HTTPNotFound(explanation=msg, request=req,
content_type="text/plain")
if p_w_picpath['location'] and CONF.delayed_delete: # 这里做了判断
status = 'pending_delete'
else:
status = 'deleted'
。。。。。。
# v2的api
glance/api/v2/p_w_picpath.py
@utils.mutating
def delete(self, req, p_w_picpath_id):
p_w_picpath_repo = self.gateway.get_repo(req.context)
try:
p_w_picpath = p_w_picpath_repo.get(p_w_picpath_id)
p_w_picpath.delete() # 跟进去看
p_w_picpath_repo.remove(p_w_picpath)
except (glance_store.Forbidden, exception.Forbidden) as e:
LOG.debug("User not permitted to delete p_w_picpath '%s'", p_w_picpath_id)
raise webob.exc.HTTPForbidden(explanation=e.msg)
except (glance_store.NotFound, exception.NotFound) as e:
msg = (_("Failed to find p_w_picpath %(p_w_picpath_id)s to delete") %
{'p_w_picpath_id': p_w_picpath_id})
LOG.warn(msg)
raise webob.exc.HTTPNotFound(explanation=msg)
except glance_store.exceptions.InUseByStore as e:
msg = (_("Image %(id)s could not be deleted "
"because it is in use: %(exc)s") %
{"id": p_w_picpath_id,
"exc": e.msg})
LOG.warn(msg)
raise webob.exc.HTTPConflict(explanation=msg)
except glance_store.exceptions.HasSnapshot as e:
raise webob.exc.HTTPConflict(explanation=e.msg)
except exception.InvalidImageStatusTransition as e:
raise webob.exc.HTTPBadRequest(explanation=e.msg)
except exception.NotAuthenticated as e:
raise webob.exc.HTTPUnauthorized(explanation=e.msg)
glance/domain/proxy.py
class Image(object):
def __init__(self, base, member_repo_proxy_class=None,
member_repo_proxy_kwargs=None):
self.base = base
self.helper = Helper(member_repo_proxy_class,
member_repo_proxy_kwargs)
name = _proxy('base', 'name')
p_w_picpath_id = _proxy('base', 'p_w_picpath_id')
status = _proxy('base', 'status')
created_at = _proxy('base', 'created_at')
updated_at = _proxy('base', 'updated_at')
visibility = _proxy('base', 'visibility')
min_disk = _proxy('base', 'min_disk')
min_ram = _proxy('base', 'min_ram')
protected = _proxy('base', 'protected')
locations = _proxy('base', 'locations')
checksum = _proxy('base', 'checksum')
owner = _proxy('base', 'owner')
disk_format = _proxy('base', 'disk_format')
container_format = _proxy('base', 'container_format')
size = _proxy('base', 'size')
virtual_size = _proxy('base', 'virtual_size')
extra_properties = _proxy('base', 'extra_properties')
tags = _proxy('base', 'tags')
def delete(self):
self.base.delete() # 这里的base来自glance/domain/__init__.py
glance/domain/__init__.py
class Image(object):
valid_state_targets = {
# Each key denotes a "current" state for the p_w_picpath. Corresponding
# values list the valid states to which we can jump from that "current"
# state.
# NOTE(flwang): In v2, we are deprecating the 'killed' status, so it's
# allowed to restore p_w_picpath from 'saving' to 'queued' so that upload
# can be retried.
'queued': ('saving', 'active', 'deleted'),
'saving': ('active', 'killed', 'deleted', 'queued'),
'active': ('pending_delete', 'deleted', 'deactivated'),
'killed': ('deleted',),
'pending_delete': ('deleted',),
'deleted': (),
'deactivated': ('active', 'deleted'),
}
def __init__(self, p_w_picpath_id, status, created_at, updated_at, **kwargs):
self.p_w_picpath_id = p_w_picpath_id
self.status = status
self.created_at = created_at
self.updated_at = updated_at
self.name = kwargs.pop('name', None)
self.visibility = kwargs.pop('visibility', 'private')
self.min_disk = kwargs.pop('min_disk', 0)
self.min_ram = kwargs.pop('min_ram', 0)
self.protected = kwargs.pop('protected', False)
self.locations = kwargs.pop('locations', [])
self.checksum = kwargs.pop('checksum', None)
self.owner = kwargs.pop('owner', None)
self._disk_format = kwargs.pop('disk_format', None)
self._container_format = kwargs.pop('container_format', None)
self.size = kwargs.pop('size', None)
self.virtual_size = kwargs.pop('virtual_size', None)
extra_properties = kwargs.pop('extra_properties', {})
self.extra_properties = ExtraProperties(extra_properties)
self.tags = kwargs.pop('tags', [])
if kwargs:
message = _("__init__() got unexpected keyword argument '%s'")
raise TypeError(message % list(kwargs.keys())[0])
def delete(self): # base调用的是这个delete方法
if self.protected:
raise exception.ProtectedImageDelete(p_w_picpath_id=self.p_w_picpath_id)
if CONF.delayed_delete and self.locations: # 跟v1 api同样的判断
self.status = 'pending_delete'
else:
self.status = 'deleted'
# v2 api有gateway、proxy、domain这些概念,留个悬念,下次弄清楚。
这里是官方对gateway、domain、proxy的介绍:http://docs.openstack.org/developer/glance/domain_model.html
修改glance-scrubber.conf文件
[root@controller2 ~(keystone_admin)]# egrep -v "^$|^#" /etc/glance/glance-scrubber.conf [DEFAULT] scrub_time=300 delayed_delete=true send_identity_headers=true wakeup_time=60 daemon=True admin_user=glance admin_password=glance admin_tenant_name=service auth_url=http://controller2:35357/v2.0 auth_region=RegionOne registry_host=controller2 registry_port=9191 [database] connection = mysql+pymysql://glance:glance@controller2/glance?charset=utf8 [oslo_concurrency] [oslo_policy] [glance_store] default_store = rbd stores = rbd,http,cinder rbd_store_pool = p_w_picpaths rbd_store_user = glance rbd_store_ceph_conf = /etc/ceph/ceph.conf rbd_store_chunk_size = 8 filesystem_store_datadirs = /var/lib/glance/p_w_picpaths
启动glance-srubber服务
[root@controller2 ~(keystone_admin)]# service openstack-glance-scrubber start
接下来看glance scrubber的启动过程
glance/cmd/scrubber.py
def main():
CONF.register_cli_opts(scrubber.scrubber_cmd_cli_opts)
CONF.register_opts(scrubber.scrubber_cmd_opts)
try:
config.parse_args()
logging.setup(CONF, 'glance')
glance_store.register_opts(config.CONF)
glance_store.create_stores(config.CONF) # 会调用glance_store/backend.py的create_stores函数,初始化SCHEME_TO_CLS_MAP
glance_store.verify_default_store()
app = scrubber.Scrubber(glance_store) # 会作为下面的store_api
if CONF.daemon: # 让glance-scrubber以daemon方式存在
server = scrubber.Daemon(CONF.wakeup_time)
server.start(app)
server.wait()
else:
app.run()
except RuntimeError as e:
sys.exit("ERROR: %s" % e)
if __name__ == '__main__':
main()
Daemon类
glance/scrubber.py
class Daemon(object):
def __init__(self, wakeup_time=300, threads=100):
LOG.info(_LI("Starting Daemon: wakeup_time=%(wakeup_time)s "
"threads=%(threads)s"),
{'wakeup_time': wakeup_time, 'threads': threads})
self.wakeup_time = wakeup_time
self.event = eventlet.event.Event()
# This pool is used for periodic instantiation of scrubber
self.daemon_pool = eventlet.greenpool.GreenPool(threads)
def start(self, application):
self._run(application)
def wait(self):
try:
self.event.wait()
except KeyboardInterrupt:
msg = _LI("Daemon Shutdown on KeyboardInterrupt")
LOG.info(msg)
def _run(self, application):
LOG.debug("Running application")
self.daemon_pool.spawn_n(application.run, self.event) # 这里也用eventlet
eventlet.spawn_after(self.wakeup_time, self._run, application) # application就是下面Scrubber的instance
LOG.debug("Next run scheduled in %s seconds", self.wakeup_time)
Scrubber类
class Scrubber(object):
def __init__(self, store_api):
LOG.info(_LI("Initializing scrubber with configuration: %s"),
six.text_type({'registry_host': CONF.registry_host,
'registry_port': CONF.registry_port}))
self.store_api = store_api
registry.configure_registry_client()
registry.configure_registry_admin_creds() # glance/registry/client/v2或v1/api.py,初始好_CLIENT_CREDS,获得registry client需要
# Here we create a request context with credentials to support
# delayed delete when using multi-tenant backend storage
admin_user = CONF.admin_user
admin_tenant = CONF.admin_tenant_name # 需要配置,授权用的,要获得registry的client instance
if CONF.send_identity_headers: # 之前没enable send_identity_headers,一直授权失败,难道有坑?
# When registry is operating in trusted-auth mode
roles = [CONF.admin_role]
self.admin_context = context.RequestContext(user=admin_user,
tenant=admin_tenant,
auth_token=None,
roles=roles)
self.registry = registry.get_registry_client(self.admin_context)
else:
ctxt = context.RequestContext()
self.registry = registry.get_registry_client(ctxt)
auth_token = self.registry.auth_token
self.admin_context = context.RequestContext(user=admin_user,
tenant=admin_tenant,
auth_token=auth_token)
self.db_queue = get_scrub_queue()
self.pool = eventlet.greenpool.GreenPool(CONF.scrub_pool_size)
# 每隔wakeup_time秒就会执行这个run函数
def run(self, event=None):
delete_jobs = self._get_delete_jobs()
if delete_jobs:
list(self.pool.starmap(self._scrub_p_w_picpath, delete_jobs.items())) # 对后面可迭代对象迭代执行_scrub_p_w_picpath函数
# _scrub_p_w_picpath函数
def _scrub_p_w_picpath(self, p_w_picpath_id, delete_jobs):
if len(delete_jobs) == 0:
return
LOG.info(_LI("Scrubbing p_w_picpath %(id)s from %(count)d locations."),
{'id': p_w_picpath_id, 'count': len(delete_jobs)})
success = True
for img_id, loc_id, uri in delete_jobs:
try:
self._delete_p_w_picpath_location_from_backend(img_id, loc_id, uri)
except Exception:
success = False
if success:
p_w_picpath = self.registry.get_p_w_picpath(p_w_picpath_id)
if p_w_picpath['status'] == 'pending_delete':
self.registry.update_p_w_picpath(p_w_picpath_id, {'status': 'deleted'}) # 利用上面获得的registry client更新p_w_picpath的状态,registry是跟数据库打交道的
LOG.info(_LI("Image %s has been scrubbed successfully"), p_w_picpath_id)
else:
LOG.warn(_LW("One or more p_w_picpath locations couldn't be scrubbed "
"from backend. Leaving p_w_picpath '%s' in 'pending_delete'"
" status") % p_w_picpath_id)
# _delete_p_w_picpath_location_from_backend函数
def _delete_p_w_picpath_location_from_backend(self, p_w_picpath_id, loc_id, uri):
if CONF.metadata_encryption_key:
uri = crypt.urlsafe_decrypt(CONF.metadata_encryption_key, uri) # uri有加密,就先解密
try:
LOG.debug("Scrubbing p_w_picpath %s from a location.", p_w_picpath_id)
try:
self.store_api.delete_from_backend(uri, self.admin_context) # store_api是glance_store/__init__.py
except store_exceptions.NotFound:
LOG.info(_LI("Image location for p_w_picpath '%s' not found in "
"backend; Marking p_w_picpath location deleted in "
"db."), p_w_picpath_id)
if loc_id != '-':
db_api.get_api().p_w_picpath_location_delete(self.admin_context,
p_w_picpath_id,
int(loc_id),
'deleted')
LOG.info(_LI("Image %s is scrubbed from a location."), p_w_picpath_id)
except Exception as e:
LOG.error(_LE("Unable to scrub p_w_picpath %(id)s from a location. "
"Reason: %(exc)s ") %
{'id': p_w_picpath_id,
'exc': encodeutils.exception_to_unicode(e)})
raise
# _get_delete_jobs函数,获取要删除的镜像的dict
def _get_delete_jobs(self):
try:
records = self.db_queue.get_all_locations() # ScrubDBQueue类的get_all_locations函数
except Exception as err:
LOG.error(_LE("Can not get scrub jobs from queue: %s") %
encodeutils.exception_to_unicode(err))
return {}
delete_jobs = {}
for p_w_picpath_id, loc_id, loc_uri in records:
if p_w_picpath_id not in delete_jobs:
delete_jobs[p_w_picpath_id] = []
delete_jobs[p_w_picpath_id].append((p_w_picpath_id, loc_id, loc_uri))
return delete_jobs
# ScrubDBQueue类的get_all_locations函数
def get_all_locations(self):
"""Returns a list of p_w_picpath id and location tuple from scrub queue.
:returns: a list of p_w_picpath id, location id and uri tuple from
scrub queue
"""
ret = []
for p_w_picpath in self._get_all_p_w_picpaths():
deleted_at = p_w_picpath.get('deleted_at')
if not deleted_at:
continue
# NOTE: Strip off microseconds which may occur after the last '.,'
# Example: 2012-07-07T19:14:34.974216
date_str = deleted_at.rsplit('.', 1)[0].rsplit(',', 1)[0]
delete_time = calendar.timegm(time.strptime(date_str,
"%Y-%m-%dT%H:%M:%S"))
if delete_time + self.scrub_time > time.time(): # 判断是否到了清除的时间
continue
for loc in p_w_picpath['location_data']:
if loc['status'] != 'pending_delete': # 判断是否是pending_delete状态
continue
if self.metadata_encryption_key: # 判断镜像uri是否加密
uri = crypt.urlsafe_encrypt(self.metadata_encryption_key,
loc['url'], 64)
else:
uri = loc['url']
ret.append((p_w_picpath['id'], loc['id'], uri))
return ret
下面都是关于glance_store,算是glance的子项目了,专门和后端真正存储打交道的。
glance_store/__init__.py
from .backend import * # noqa
from .driver import * # noqa
from .exceptions import * # noqa
# 来看store_api.delete_from_backend函数
glance_store/backend.py
def delete_from_backend(uri, context=None):
"""Removes chunks of data from backend specified by uri."""
loc = location.get_location_from_uri(uri, conf=CONF)
store = get_store_from_uri(uri)
return store.delete(loc, context=context)
# get_store_from_uri函数
def get_store_from_uri(uri):
"""
Given a URI, return the store object that would handle
operations on the URI.
:param uri: URI to analyze
"""
scheme = uri[0:uri.find('/') - 1] # 形如 得到的会是这样的file、rbd
return get_store_from_scheme(scheme)
# get_store_from_scheme函数,从SCHEME_TO_CLS_MAP中获取对应的schema mapping
def get_store_from_scheme(scheme):
"""
Given a scheme, return the appropriate store object
for handling that scheme.
"""
if scheme not in location.SCHEME_TO_CLS_MAP:
raise exceptions.UnknownScheme(scheme=scheme)
scheme_info = location.SCHEME_TO_CLS_MAP[scheme]
store = scheme_info['store']
if not store.is_capable(capabilities.BitMasks.DRIVER_REUSABLE):
# Driver instance isn't stateless so it can't
# be reused safely and need recreation.
store_entry = scheme_info['store_entry']
store = _load_store(store.conf, store_entry, invoke_load=True)
store.configure()
try:
scheme_map = {}
loc_cls = store.get_store_location_class()
for scheme in store.get_schemes():
scheme_map[scheme] = {
'store': store,
'location_class': loc_cls,
'store_entry': store_entry
}
location.register_scheme_map(scheme_map)
except NotImplementedError:
scheme_info['store'] = store
return store
# 上面配置的stores是rbd,获得的就是glance_store/_drivers/rbd.py
@capabilities.check
def delete(self, location, context=None):
"""
Takes a `glance_store.location.Location` object that indicates
where to find the p_w_picpath file to delete.
:param location: `glance_store.location.Location` object, supplied
from glance_store.location.get_location_from_uri()
:raises: NotFound if p_w_picpath does not exist;
InUseByStore if p_w_picpath is in use or snapshot unprotect failed
"""
loc = location.store_location
target_pool = loc.pool or self.pool
self._delete_p_w_picpath(target_pool, loc.p_w_picpath, loc.snapshot)
# _delete_p_w_picpath函数
def _delete_p_w_picpath(self, target_pool, p_w_picpath_name,
snapshot_name=None, context=None):
"""
Delete RBD p_w_picpath and snapshot.
:param p_w_picpath_name: Image's name
:param snapshot_name: Image snapshot's name
:raises: NotFound if p_w_picpath does not exist;
InUseByStore if p_w_picpath is in use or snapshot unprotect failed
"""
with self.get_connection(conffile=self.conf_file,
rados_id=self.user) as conn:
with conn.open_ioctx(target_pool) as ioctx:
try:
# First remove snapshot.
if snapshot_name is not None:
with rbd.Image(ioctx, p_w_picpath_name) as p_w_picpath:
try:
p_w_picpath.unprotect_snap(snapshot_name)
p_w_picpath.remove_snap(snapshot_name)
except rbd.ImageNotFound as exc:
msg = (_("Snap Operating Exception "
"%(snap_exc)s "
"Snapshot does not exist.") %
{'snap_exc': exc})
LOG.debug(msg)
except rbd.ImageBusy as exc:
log_msg = (_LE("Snap Operating Exception "
"%(snap_exc)s "
"Snapshot is in use.") %
{'snap_exc': exc})
LOG.error(log_msg)
raise exceptions.InUseByStore()
# Then delete p_w_picpath.
rbd.RBD().remove(ioctx, p_w_picpath_name)
except rbd.ImageHasSnapshots:
log_msg = (_LE("Remove p_w_picpath %(img_name)s failed. "
"It has snapshot(s) left.") %
{'img_name': p_w_picpath_name})
LOG.error(log_msg)
raise exceptions.HasSnapshot()
except rbd.ImageBusy:
log_msg = (_LE("Remove p_w_picpath %(img_name)s failed. "
"It is in use.") %
{'img_name': p_w_picpath_name})
LOG.error(log_msg)
raise exceptions.InUseByStore()
except rbd.ImageNotFound:
msg = _("RBD p_w_picpath %s does not exist") % p_w_picpath_name
raise exceptions.NotFound(message=msg)
参考链接
eventlet常用函数介绍 http://www.cnblogs.com/Security-Darren/p/4168233.html
以上过程,理解不对的地方,还请指正,见谅!