glance支持延迟删除镜像的功能,个人觉得挺实用的,特别是在误删除的情况下。从某种程度来说,也算是对数据一种另类保护吧。
大致实现原理是:有个delayed_delete设置是否开启延迟删除的开关,如果为True的话,每次删除镜像的时候都会把镜像的状态置为pending_delete,记录此刻的delete_time,有个scrubber的后台进程会每隔一段时间(wakeup_time)去check是否有pending_delete的镜像要删除,删除的判断标准是:该镜像被删除的那个时刻的delete_time + scrub_time <= time.time(),scrub_time是镜像要隔多少秒才真正被擦除掉。
开启delayed_delete
[root@controller2 ~(keystone_admin)]# vim /etc/glance/glance-api.conf delayed_delete = True
来看glance api端删除镜像的时候判断是否开启了delayed_delete的代码
# v1的api glance/api/v1/p_w_picpath.py @utils.mutating def delete(self, req, id): """ Deletes the p_w_picpath and all its chunks from the Glance :param req: The WSGI/Webob Request object :param id: The opaque p_w_picpath identifier :raises: HttpBadRequest if p_w_picpath registry is invalid :raises: HttpNotFound if p_w_picpath or any chunk is not available :raises: HttpUnauthorized if p_w_picpath or any chunk is not deleteable by the requesting user """ self._enforce(req, 'delete_p_w_picpath') p_w_picpath = self.get_p_w_picpath_meta_or_404(req, id) if p_w_picpath['protected']: msg = _("Image is protected") LOG.warn(msg) raise HTTPForbidden(explanation=msg, request=req, content_type="text/plain") if p_w_picpath['status'] == 'pending_delete': msg = (_("Forbidden to delete a %s p_w_picpath.") % p_w_picpath['status']) LOG.warn(msg) raise HTTPForbidden(explanation=msg, request=req, content_type="text/plain") elif p_w_picpath['status'] == 'deleted': msg = _("Image %s not found.") % id LOG.warn(msg) raise HTTPNotFound(explanation=msg, request=req, content_type="text/plain") if p_w_picpath['location'] and CONF.delayed_delete: # 这里做了判断 status = 'pending_delete' else: status = 'deleted' 。。。。。。 # v2的api glance/api/v2/p_w_picpath.py @utils.mutating def delete(self, req, p_w_picpath_id): p_w_picpath_repo = self.gateway.get_repo(req.context) try: p_w_picpath = p_w_picpath_repo.get(p_w_picpath_id) p_w_picpath.delete() # 跟进去看 p_w_picpath_repo.remove(p_w_picpath) except (glance_store.Forbidden, exception.Forbidden) as e: LOG.debug("User not permitted to delete p_w_picpath '%s'", p_w_picpath_id) raise webob.exc.HTTPForbidden(explanation=e.msg) except (glance_store.NotFound, exception.NotFound) as e: msg = (_("Failed to find p_w_picpath %(p_w_picpath_id)s to delete") % {'p_w_picpath_id': p_w_picpath_id}) LOG.warn(msg) raise webob.exc.HTTPNotFound(explanation=msg) except glance_store.exceptions.InUseByStore as e: msg = (_("Image %(id)s could not be deleted " "because it is in use: %(exc)s") % {"id": p_w_picpath_id, "exc": e.msg}) LOG.warn(msg) raise webob.exc.HTTPConflict(explanation=msg) except glance_store.exceptions.HasSnapshot as e: raise webob.exc.HTTPConflict(explanation=e.msg) except exception.InvalidImageStatusTransition as e: raise webob.exc.HTTPBadRequest(explanation=e.msg) except exception.NotAuthenticated as e: raise webob.exc.HTTPUnauthorized(explanation=e.msg) glance/domain/proxy.py class Image(object): def __init__(self, base, member_repo_proxy_class=None, member_repo_proxy_kwargs=None): self.base = base self.helper = Helper(member_repo_proxy_class, member_repo_proxy_kwargs) name = _proxy('base', 'name') p_w_picpath_id = _proxy('base', 'p_w_picpath_id') status = _proxy('base', 'status') created_at = _proxy('base', 'created_at') updated_at = _proxy('base', 'updated_at') visibility = _proxy('base', 'visibility') min_disk = _proxy('base', 'min_disk') min_ram = _proxy('base', 'min_ram') protected = _proxy('base', 'protected') locations = _proxy('base', 'locations') checksum = _proxy('base', 'checksum') owner = _proxy('base', 'owner') disk_format = _proxy('base', 'disk_format') container_format = _proxy('base', 'container_format') size = _proxy('base', 'size') virtual_size = _proxy('base', 'virtual_size') extra_properties = _proxy('base', 'extra_properties') tags = _proxy('base', 'tags') def delete(self): self.base.delete() # 这里的base来自glance/domain/__init__.py glance/domain/__init__.py class Image(object): valid_state_targets = { # Each key denotes a "current" state for the p_w_picpath. Corresponding # values list the valid states to which we can jump from that "current" # state. # NOTE(flwang): In v2, we are deprecating the 'killed' status, so it's # allowed to restore p_w_picpath from 'saving' to 'queued' so that upload # can be retried. 'queued': ('saving', 'active', 'deleted'), 'saving': ('active', 'killed', 'deleted', 'queued'), 'active': ('pending_delete', 'deleted', 'deactivated'), 'killed': ('deleted',), 'pending_delete': ('deleted',), 'deleted': (), 'deactivated': ('active', 'deleted'), } def __init__(self, p_w_picpath_id, status, created_at, updated_at, **kwargs): self.p_w_picpath_id = p_w_picpath_id self.status = status self.created_at = created_at self.updated_at = updated_at self.name = kwargs.pop('name', None) self.visibility = kwargs.pop('visibility', 'private') self.min_disk = kwargs.pop('min_disk', 0) self.min_ram = kwargs.pop('min_ram', 0) self.protected = kwargs.pop('protected', False) self.locations = kwargs.pop('locations', []) self.checksum = kwargs.pop('checksum', None) self.owner = kwargs.pop('owner', None) self._disk_format = kwargs.pop('disk_format', None) self._container_format = kwargs.pop('container_format', None) self.size = kwargs.pop('size', None) self.virtual_size = kwargs.pop('virtual_size', None) extra_properties = kwargs.pop('extra_properties', {}) self.extra_properties = ExtraProperties(extra_properties) self.tags = kwargs.pop('tags', []) if kwargs: message = _("__init__() got unexpected keyword argument '%s'") raise TypeError(message % list(kwargs.keys())[0]) def delete(self): # base调用的是这个delete方法 if self.protected: raise exception.ProtectedImageDelete(p_w_picpath_id=self.p_w_picpath_id) if CONF.delayed_delete and self.locations: # 跟v1 api同样的判断 self.status = 'pending_delete' else: self.status = 'deleted' # v2 api有gateway、proxy、domain这些概念,留个悬念,下次弄清楚。
这里是官方对gateway、domain、proxy的介绍:http://docs.openstack.org/developer/glance/domain_model.html
修改glance-scrubber.conf文件
[root@controller2 ~(keystone_admin)]# egrep -v "^$|^#" /etc/glance/glance-scrubber.conf [DEFAULT] scrub_time=300 delayed_delete=true send_identity_headers=true wakeup_time=60 daemon=True admin_user=glance admin_password=glance admin_tenant_name=service auth_url=http://controller2:35357/v2.0 auth_region=RegionOne registry_host=controller2 registry_port=9191 [database] connection = mysql+pymysql://glance:glance@controller2/glance?charset=utf8 [oslo_concurrency] [oslo_policy] [glance_store] default_store = rbd stores = rbd,http,cinder rbd_store_pool = p_w_picpaths rbd_store_user = glance rbd_store_ceph_conf = /etc/ceph/ceph.conf rbd_store_chunk_size = 8 filesystem_store_datadirs = /var/lib/glance/p_w_picpaths
启动glance-srubber服务
[root@controller2 ~(keystone_admin)]# service openstack-glance-scrubber start
接下来看glance scrubber的启动过程
glance/cmd/scrubber.py def main(): CONF.register_cli_opts(scrubber.scrubber_cmd_cli_opts) CONF.register_opts(scrubber.scrubber_cmd_opts) try: config.parse_args() logging.setup(CONF, 'glance') glance_store.register_opts(config.CONF) glance_store.create_stores(config.CONF) # 会调用glance_store/backend.py的create_stores函数,初始化SCHEME_TO_CLS_MAP glance_store.verify_default_store() app = scrubber.Scrubber(glance_store) # 会作为下面的store_api if CONF.daemon: # 让glance-scrubber以daemon方式存在 server = scrubber.Daemon(CONF.wakeup_time) server.start(app) server.wait() else: app.run() except RuntimeError as e: sys.exit("ERROR: %s" % e) if __name__ == '__main__': main()
Daemon类
glance/scrubber.py class Daemon(object): def __init__(self, wakeup_time=300, threads=100): LOG.info(_LI("Starting Daemon: wakeup_time=%(wakeup_time)s " "threads=%(threads)s"), {'wakeup_time': wakeup_time, 'threads': threads}) self.wakeup_time = wakeup_time self.event = eventlet.event.Event() # This pool is used for periodic instantiation of scrubber self.daemon_pool = eventlet.greenpool.GreenPool(threads) def start(self, application): self._run(application) def wait(self): try: self.event.wait() except KeyboardInterrupt: msg = _LI("Daemon Shutdown on KeyboardInterrupt") LOG.info(msg) def _run(self, application): LOG.debug("Running application") self.daemon_pool.spawn_n(application.run, self.event) # 这里也用eventlet eventlet.spawn_after(self.wakeup_time, self._run, application) # application就是下面Scrubber的instance LOG.debug("Next run scheduled in %s seconds", self.wakeup_time)
Scrubber类
class Scrubber(object): def __init__(self, store_api): LOG.info(_LI("Initializing scrubber with configuration: %s"), six.text_type({'registry_host': CONF.registry_host, 'registry_port': CONF.registry_port})) self.store_api = store_api registry.configure_registry_client() registry.configure_registry_admin_creds() # glance/registry/client/v2或v1/api.py,初始好_CLIENT_CREDS,获得registry client需要 # Here we create a request context with credentials to support # delayed delete when using multi-tenant backend storage admin_user = CONF.admin_user admin_tenant = CONF.admin_tenant_name # 需要配置,授权用的,要获得registry的client instance if CONF.send_identity_headers: # 之前没enable send_identity_headers,一直授权失败,难道有坑? # When registry is operating in trusted-auth mode roles = [CONF.admin_role] self.admin_context = context.RequestContext(user=admin_user, tenant=admin_tenant, auth_token=None, roles=roles) self.registry = registry.get_registry_client(self.admin_context) else: ctxt = context.RequestContext() self.registry = registry.get_registry_client(ctxt) auth_token = self.registry.auth_token self.admin_context = context.RequestContext(user=admin_user, tenant=admin_tenant, auth_token=auth_token) self.db_queue = get_scrub_queue() self.pool = eventlet.greenpool.GreenPool(CONF.scrub_pool_size) # 每隔wakeup_time秒就会执行这个run函数 def run(self, event=None): delete_jobs = self._get_delete_jobs() if delete_jobs: list(self.pool.starmap(self._scrub_p_w_picpath, delete_jobs.items())) # 对后面可迭代对象迭代执行_scrub_p_w_picpath函数 # _scrub_p_w_picpath函数 def _scrub_p_w_picpath(self, p_w_picpath_id, delete_jobs): if len(delete_jobs) == 0: return LOG.info(_LI("Scrubbing p_w_picpath %(id)s from %(count)d locations."), {'id': p_w_picpath_id, 'count': len(delete_jobs)}) success = True for img_id, loc_id, uri in delete_jobs: try: self._delete_p_w_picpath_location_from_backend(img_id, loc_id, uri) except Exception: success = False if success: p_w_picpath = self.registry.get_p_w_picpath(p_w_picpath_id) if p_w_picpath['status'] == 'pending_delete': self.registry.update_p_w_picpath(p_w_picpath_id, {'status': 'deleted'}) # 利用上面获得的registry client更新p_w_picpath的状态,registry是跟数据库打交道的 LOG.info(_LI("Image %s has been scrubbed successfully"), p_w_picpath_id) else: LOG.warn(_LW("One or more p_w_picpath locations couldn't be scrubbed " "from backend. Leaving p_w_picpath '%s' in 'pending_delete'" " status") % p_w_picpath_id) # _delete_p_w_picpath_location_from_backend函数 def _delete_p_w_picpath_location_from_backend(self, p_w_picpath_id, loc_id, uri): if CONF.metadata_encryption_key: uri = crypt.urlsafe_decrypt(CONF.metadata_encryption_key, uri) # uri有加密,就先解密 try: LOG.debug("Scrubbing p_w_picpath %s from a location.", p_w_picpath_id) try: self.store_api.delete_from_backend(uri, self.admin_context) # store_api是glance_store/__init__.py except store_exceptions.NotFound: LOG.info(_LI("Image location for p_w_picpath '%s' not found in " "backend; Marking p_w_picpath location deleted in " "db."), p_w_picpath_id) if loc_id != '-': db_api.get_api().p_w_picpath_location_delete(self.admin_context, p_w_picpath_id, int(loc_id), 'deleted') LOG.info(_LI("Image %s is scrubbed from a location."), p_w_picpath_id) except Exception as e: LOG.error(_LE("Unable to scrub p_w_picpath %(id)s from a location. " "Reason: %(exc)s ") % {'id': p_w_picpath_id, 'exc': encodeutils.exception_to_unicode(e)}) raise
# _get_delete_jobs函数,获取要删除的镜像的dict def _get_delete_jobs(self): try: records = self.db_queue.get_all_locations() # ScrubDBQueue类的get_all_locations函数 except Exception as err: LOG.error(_LE("Can not get scrub jobs from queue: %s") % encodeutils.exception_to_unicode(err)) return {} delete_jobs = {} for p_w_picpath_id, loc_id, loc_uri in records: if p_w_picpath_id not in delete_jobs: delete_jobs[p_w_picpath_id] = [] delete_jobs[p_w_picpath_id].append((p_w_picpath_id, loc_id, loc_uri)) return delete_jobs # ScrubDBQueue类的get_all_locations函数 def get_all_locations(self): """Returns a list of p_w_picpath id and location tuple from scrub queue. :returns: a list of p_w_picpath id, location id and uri tuple from scrub queue """ ret = [] for p_w_picpath in self._get_all_p_w_picpaths(): deleted_at = p_w_picpath.get('deleted_at') if not deleted_at: continue # NOTE: Strip off microseconds which may occur after the last '.,' # Example: 2012-07-07T19:14:34.974216 date_str = deleted_at.rsplit('.', 1)[0].rsplit(',', 1)[0] delete_time = calendar.timegm(time.strptime(date_str, "%Y-%m-%dT%H:%M:%S")) if delete_time + self.scrub_time > time.time(): # 判断是否到了清除的时间 continue for loc in p_w_picpath['location_data']: if loc['status'] != 'pending_delete': # 判断是否是pending_delete状态 continue if self.metadata_encryption_key: # 判断镜像uri是否加密 uri = crypt.urlsafe_encrypt(self.metadata_encryption_key, loc['url'], 64) else: uri = loc['url'] ret.append((p_w_picpath['id'], loc['id'], uri)) return ret
下面都是关于glance_store,算是glance的子项目了,专门和后端真正存储打交道的。
glance_store/__init__.py from .backend import * # noqa from .driver import * # noqa from .exceptions import * # noqa # 来看store_api.delete_from_backend函数 glance_store/backend.py def delete_from_backend(uri, context=None): """Removes chunks of data from backend specified by uri.""" loc = location.get_location_from_uri(uri, conf=CONF) store = get_store_from_uri(uri) return store.delete(loc, context=context) # get_store_from_uri函数 def get_store_from_uri(uri): """ Given a URI, return the store object that would handle operations on the URI. :param uri: URI to analyze """ scheme = uri[0:uri.find('/') - 1] # 形如 得到的会是这样的file、rbd return get_store_from_scheme(scheme) # get_store_from_scheme函数,从SCHEME_TO_CLS_MAP中获取对应的schema mapping def get_store_from_scheme(scheme): """ Given a scheme, return the appropriate store object for handling that scheme. """ if scheme not in location.SCHEME_TO_CLS_MAP: raise exceptions.UnknownScheme(scheme=scheme) scheme_info = location.SCHEME_TO_CLS_MAP[scheme] store = scheme_info['store'] if not store.is_capable(capabilities.BitMasks.DRIVER_REUSABLE): # Driver instance isn't stateless so it can't # be reused safely and need recreation. store_entry = scheme_info['store_entry'] store = _load_store(store.conf, store_entry, invoke_load=True) store.configure() try: scheme_map = {} loc_cls = store.get_store_location_class() for scheme in store.get_schemes(): scheme_map[scheme] = { 'store': store, 'location_class': loc_cls, 'store_entry': store_entry } location.register_scheme_map(scheme_map) except NotImplementedError: scheme_info['store'] = store return store # 上面配置的stores是rbd,获得的就是glance_store/_drivers/rbd.py @capabilities.check def delete(self, location, context=None): """ Takes a `glance_store.location.Location` object that indicates where to find the p_w_picpath file to delete. :param location: `glance_store.location.Location` object, supplied from glance_store.location.get_location_from_uri() :raises: NotFound if p_w_picpath does not exist; InUseByStore if p_w_picpath is in use or snapshot unprotect failed """ loc = location.store_location target_pool = loc.pool or self.pool self._delete_p_w_picpath(target_pool, loc.p_w_picpath, loc.snapshot) # _delete_p_w_picpath函数 def _delete_p_w_picpath(self, target_pool, p_w_picpath_name, snapshot_name=None, context=None): """ Delete RBD p_w_picpath and snapshot. :param p_w_picpath_name: Image's name :param snapshot_name: Image snapshot's name :raises: NotFound if p_w_picpath does not exist; InUseByStore if p_w_picpath is in use or snapshot unprotect failed """ with self.get_connection(conffile=self.conf_file, rados_id=self.user) as conn: with conn.open_ioctx(target_pool) as ioctx: try: # First remove snapshot. if snapshot_name is not None: with rbd.Image(ioctx, p_w_picpath_name) as p_w_picpath: try: p_w_picpath.unprotect_snap(snapshot_name) p_w_picpath.remove_snap(snapshot_name) except rbd.ImageNotFound as exc: msg = (_("Snap Operating Exception " "%(snap_exc)s " "Snapshot does not exist.") % {'snap_exc': exc}) LOG.debug(msg) except rbd.ImageBusy as exc: log_msg = (_LE("Snap Operating Exception " "%(snap_exc)s " "Snapshot is in use.") % {'snap_exc': exc}) LOG.error(log_msg) raise exceptions.InUseByStore() # Then delete p_w_picpath. rbd.RBD().remove(ioctx, p_w_picpath_name) except rbd.ImageHasSnapshots: log_msg = (_LE("Remove p_w_picpath %(img_name)s failed. " "It has snapshot(s) left.") % {'img_name': p_w_picpath_name}) LOG.error(log_msg) raise exceptions.HasSnapshot() except rbd.ImageBusy: log_msg = (_LE("Remove p_w_picpath %(img_name)s failed. " "It is in use.") % {'img_name': p_w_picpath_name}) LOG.error(log_msg) raise exceptions.InUseByStore() except rbd.ImageNotFound: msg = _("RBD p_w_picpath %s does not exist") % p_w_picpath_name raise exceptions.NotFound(message=msg)
参考链接
eventlet常用函数介绍 http://www.cnblogs.com/Security-Darren/p/4168233.html
以上过程,理解不对的地方,还请指正,见谅!