cinder backup就是针对cinder volume的备份,后端可以有不同的bakcup driver,如nfs、ceph、swift、glusterfs等。
这里演示volume backend为glusterfs,backup backend为nfs。(centos7 环境)
一、如何安装配置glusterfs,见 http://iceyao.blog.51cto.com/9426658/1634839
二、如何安装配置nfs server
# nfs server端配置 yum install nfs-utils # 安装nfs包 mkdir -p /backup # 创建nfs共享目录 vim /etc/exports # 编译exports,配置一些访问权限 /backup 172.16.40.0/24(rw,sync,no_root_squash,no_all_squash) # 启动服务 systemctl enable rpcbind systemctl enable nfs-server systemctl restart rpcbind systemctl restart nfs-server
这里要注意的是:如果glusterfs-server和nfs-server在同一台机器上,需要disable glusterfs的nfs server功能,
避免端口冲突。
gluster volume setnfs.disable on # 关闭nfs server功能 rpcinfo -p | grep nfs # 查看nfs server所用的port
参考链接:https://forum.proxmox.com/threads/mount-nfs-requested-nfs-version-or-transport-protocol-is-not-supported.20260/
# nfs client端配置 yum install nfs-utils systemctl enable rpcbind systemctl restart rpcbind showmount -e # 验证/backup共享目录是否被发现
三、下面就是OpenStack Cinder配置
stack@yxb-devstack:~$ cat /etc/cinder/cinder.conf [DEFAULT] enabled_backends = glusterfs os_region_name = RegionOne backup_driver = cinder.backup.drivers.nfs backup_share=172.16.40.189:/backup #backup_mount_options = "vers=3" # backup_mount_point_base = $state_path/backup_mount # 备份目录挂载点 [glusterfs] volume_driver=cinder.volume.drivers.glusterfs.GlusterfsDriver glusterfs_shares_config = /etc/cinder/glusterfs_shares volume_backend_name = glusterfs # 重启cinder-volume、cinder-backup服务 systemctl restart openstack-cinder-volume openstack-cinder-backup # horizon启动cinder backup功能 vim /etc/openstack-dashboard/local_settings OPENSTACK_CINDER_FEATURES = { 'enable_backup': True, }
参考链接:
http://docs.openstack.org/admin-guide-cloud/blockstorage_volume_backups.html
http://docs.openstack.org/liberty/config-reference/content/nfs-backup-driver.html
https://access.redhat.com/documentation/en-US/Red_Hat_Storage/3/html/Administration_Guide/sect-NFS.html#Manually_Mounting_Volumes_Using_NFS (glusterfs-nfs)
http://docs.openstack.org/liberty/config-reference/content/GlusterFS-driver.html # 也支持glusterfs backup
四、cinder backup cli使用
stack@yxb-devstack:~$ cinder help backup-create # help下就懂怎么用了 usage: cinder backup-create [--container] [--name ] [--description ] [--incremental] [--force] Creates a volume backup. Positional arguments: Name or ID of volume to backup. Optional arguments: --container Backup container name. Default=None. --name Backup name. Default=None. --description Backup description. Default=None. --incremental Incremental backup. Default=False. --force Allows or disallows backup of a volume when the volume is attached to an instance. If set to True, backs up the volume whether its status is "available" or "in- use". The backup of an "in-use" volume means your data is crash consistent. Default=False.
小结:
1、cinder backup目前是支持全量、增量、在线、离线备份;
2、增量备份必须建立在全量备份的基础上;
3、删除备份,如果有基于全量备份的增量备份,应先删除增量备份;删除增量备份也有顺序之分,后面做的增量备份先删除
4、目前不支持在线备份还原(L版和最新master都是);
5、第一次从全量备份还原的时候目前不能指定卷名字(master分支已有patch修复 https://review.openstack.org/#/c/275910/)
6、备份glusterfs卷有限制,该卷是raw格式、不能有snapshot,
备份的volume必须是raw格式,相关配置项nas_volume_prov_type = thick(thin|thick); (L版存在这种问题,最新master已不存在)
五、cinder backup代码调用(最新master分支)
cinderclient/v2/volume_backups.py # cinder client端发起请求到api端
def create(self, volume_id, container=None, name=None, description=None, incremental=False, force=False, snapshot_id=None): """Creates a volume backup. :param volume_id: The ID of the volume to backup. :param container: The name of the backup service container. :param name: The name of the backup. :param description: The description of the backup. :param incremental: Incremental backup. :param force: If True, allows an in-use volume to be backed up. :rtype: :class:`VolumeBackup` """ body = {'backup': {'volume_id': volume_id, 'container': container, 'name': name, 'description': description, 'incremental': incremental, 'force': force, 'snapshot_id': snapshot_id, }} return self._create('/backups', body, 'backup')
cinder/api/contrib/backup.py
@wsgi.response(202) @wsgi.serializers(xml=BackupTemplate) @wsgi.deserializers(xml=CreateDeserializer) def create(self, req, body): """Create a new backup.""" LOG.debug('Creating new backup %s', body) self.assert_valid_body(body, 'backup') context = req.environ['cinder.context'] backup = body['backup'] try: volume_id = backup['volume_id'] except KeyError: msg = _("Incorrect request body format") raise exc.HTTPBadRequest(explanation=msg) container = backup.get('container', None) self.validate_name_and_description(backup) name = backup.get('name', None) description = backup.get('description', None) incremental = backup.get('incremental', False) force = backup.get('force', False) snapshot_id = backup.get('snapshot_id', None) LOG.info(_LI("Creating backup of volume %(volume_id)s in container" " %(container)s"), {'volume_id': volume_id, 'container': container}, context=context) try: new_backup = self.backup_api.create(context, name, description, # 转发请求到backup api volume_id, container, incremental, None, force, snapshot_id) except (exception.InvalidVolume, exception.InvalidSnapshot) as error: raise exc.HTTPBadRequest(explanation=error.msg) except (exception.VolumeNotFound, exception.SnapshotNotFound) as error: raise exc.HTTPNotFound(explanation=error.msg) except exception.ServiceNotFound as error: raise exc.HTTPInternalServerError(explanation=error.msg) retval = self._view_builder.summary(req, dict(new_backup)) return retval
cinder/backup/api.py
def create(self, context, name, description, volume_id, container, incremental=False, availability_zone=None, force=False, snapshot_id=None): """Make the RPC call to create a volume backup.""" check_policy(context, 'create') # 校验policy权限 volume = self.volume_api.get(context, volume_id) snapshot = None if snapshot_id: snapshot = self.volume_api.get_snapshot(context, snapshot_id) if volume['status'] not in ["available", "in-use"]: msg = (_('Volume to be backed up must be available ' 'or in-use, but the current status is "%s".') % volume['status']) raise exception.InvalidVolume(reason=msg) elif volume['status'] in ["in-use"] and not snapshot_id and not force: msg = _('Backing up an in-use volume must use ' 'the force flag.') raise exception.InvalidVolume(reason=msg) elif snapshot_id and snapshot['status'] not in ["available"]: msg = (_('Snapshot to be backed up must be available, ' 'but the current status is "%s".') % snapshot['status']) raise exception.InvalidSnapshot(reason=msg) previous_status = volume['status'] host = self._get_available_backup_service_host( None, volume.availability_zone, volume_utils.extract_host(volume.host, 'host')) # Reserve a quota before setting volume status and backup status try: reserve_opts = {'backups': 1, 'backup_gigabytes': volume['size']} reservations = QUOTAS.reserve(context, **reserve_opts) except exception.OverQuota as e: overs = e.kwargs['overs'] usages = e.kwargs['usages'] quotas = e.kwargs['quotas'] def _consumed(resource_name): return (usages[resource_name]['reserved'] + usages[resource_name]['in_use']) for over in overs: if 'gigabytes' in over: msg = _LW("Quota exceeded for %(s_pid)s, tried to create " "%(s_size)sG backup (%(d_consumed)dG of " "%(d_quota)dG already consumed)") LOG.warning(msg, {'s_pid': context.project_id, 's_size': volume['size'], 'd_consumed': _consumed(over), 'd_quota': quotas[over]}) raise exception.VolumeBackupSizeExceedsAvailableQuota( requested=volume['size'], consumed=_consumed('backup_gigabytes'), quota=quotas['backup_gigabytes']) elif 'backups' in over: msg = _LW("Quota exceeded for %(s_pid)s, tried to create " "backups (%(d_consumed)d backups " "already consumed)") LOG.warning(msg, {'s_pid': context.project_id, 'd_consumed': _consumed(over)}) raise exception.BackupLimitExceeded( allowed=quotas[over]) # Find the latest backup and use it as the parent backup to do an # incremental backup. latest_backup = None if incremental: backups = objects.BackupList.get_all_by_volume(context.elevated(), volume_id) if backups.objects: # NOTE(xyang): The 'data_timestamp' field records the time # when the data on the volume was first saved. If it is # a backup from volume, 'data_timestamp' will be the same # as 'created_at' for a backup. If it is a backup from a # snapshot, 'data_timestamp' will be the same as # 'created_at' for a snapshot. # If not backing up from snapshot, the backup with the latest # 'data_timestamp' will be the parent; If backing up from # snapshot, the backup with the latest 'data_timestamp' will # be chosen only if 'data_timestamp' is earlier than the # 'created_at' timestamp of the snapshot; Otherwise, the # backup will not be chosen as the parent. # For example, a volume has a backup taken at 8:00, then # a snapshot taken at 8:10, and then a backup at 8:20. # When taking an incremental backup of the snapshot, the # parent should be the backup at 8:00, not 8:20, and the # 'data_timestamp' of this new backup will be 8:10. latest_backup = max( backups.objects, key=lambda x: x['data_timestamp'] if (not snapshot or (snapshot and x['data_timestamp'] < snapshot['created_at'])) else datetime(1, 1, 1, 1, 1, 1, tzinfo=timezone('UTC'))) else: msg = _('No backups available to do an incremental backup.') raise exception.InvalidBackup(reason=msg) parent_id = None if latest_backup: parent_id = latest_backup.id if latest_backup['status'] != fields.BackupStatus.AVAILABLE: msg = _('The parent backup must be available for ' 'incremental backup.') raise exception.InvalidBackup(reason=msg) data_timestamp = None if snapshot_id: snapshot = objects.Snapshot.get_by_id(context, snapshot_id) data_timestamp = snapshot.created_at self.db.volume_update(context, volume_id, {'status': 'backing-up', 'previous_status': previous_status}) backup = None try: kwargs = { 'user_id': context.user_id, 'project_id': context.project_id, 'display_name': name, 'display_description': description, 'volume_id': volume_id, 'status': fields.BackupStatus.CREATING, 'container': container, 'parent_id': parent_id, 'size': volume['size'], 'host': host, 'snapshot_id': snapshot_id, 'data_timestamp': data_timestamp, } backup = objects.Backup(context=context, **kwargs) backup.create() if not snapshot_id: backup.data_timestamp = backup.created_at backup.save() QUOTAS.commit(context, reservations) except Exception: with excutils.save_and_reraise_exception(): try: if backup and 'id' in backup: backup.destroy() finally: QUOTAS.rollback(context, reservations) # TODO(DuncanT): In future, when we have a generic local attach, # this can go via the scheduler, which enables # better load balancing and isolation of services self.backup_rpcapi.create_backup(context, backup) # rpc请求通过消息队列这里不贴出来了 return backup
cinder/backup/manager.py
def create_backup(self, context, backup): """Create volume backups using configured backup service.""" volume_id = backup.volume_id volume = objects.Volume.get_by_id(context, volume_id) previous_status = volume.get('previous_status', None) LOG.info(_LI('Create backup started, backup: %(backup_id)s ' 'volume: %(volume_id)s.'), {'backup_id': backup.id, 'volume_id': volume_id}) self._notify_about_backup_usage(context, backup, "create.start") backup.host = self.host backup.service = self.driver_name backup.availability_zone = self.az backup.save() expected_status = 'backing-up' actual_status = volume['status'] if actual_status != expected_status: err = _('Create backup aborted, expected volume status ' '%(expected_status)s but got %(actual_status)s.') % { 'expected_status': expected_status, 'actual_status': actual_status, } self._update_backup_error(backup, context, err) raise exception.InvalidVolume(reason=err) expected_status = fields.BackupStatus.CREATING actual_status = backup.status if actual_status != expected_status: err = _('Create backup aborted, expected backup status ' '%(expected_status)s but got %(actual_status)s.') % { 'expected_status': expected_status, 'actual_status': actual_status, } self._update_backup_error(backup, context, err) backup.save() raise exception.InvalidBackup(reason=err) try: self._run_backup(context, backup, volume) # 跟进去 except Exception as err: with excutils.save_and_reraise_exception(): self.db.volume_update(context, volume_id, {'status': previous_status, 'previous_status': 'error_backing-up'}) self._update_backup_error(backup, context, six.text_type(err)) # Restore the original status. self.db.volume_update(context, volume_id, {'status': previous_status, 'previous_status': 'backing-up'}) backup.status = fields.BackupStatus.AVAILABLE backup.size = volume['size'] backup.save() # Handle the num_dependent_backups of parent backup when child backup # has created successfully. if backup.parent_id: parent_backup = objects.Backup.get_by_id(context, backup.parent_id) parent_backup.num_dependent_backups += 1 parent_backup.save() LOG.info(_LI('Create backup finished. backup: %s.'), backup.id) self._notify_about_backup_usage(context, backup, "create.end") def _run_backup(self, context, backup, volume): backup_service = self.service.get_backup_driver(context) properties = utils.brick_get_connector_properties() backup_dic = self.volume_rpcapi.get_backup_device(context, backup, volume) try: backup_device = backup_dic.get('backup_device') is_snapshot = backup_dic.get('is_snapshot') attach_info = self._attach_device(context, backup_device, properties, is_snapshot) try: device_path = attach_info['device']['path'] if isinstance(device_path, six.string_types): if backup_dic.get('secure_enabled', False): with open(device_path) as device_file: backup_service.backup(backup, device_file) else: with utils.temporary_chown(device_path): with open(device_path) as device_file: backup_service.backup(backup, device_file) else: backup_service.backup(backup, device_path) finally: self._detach_device(context, attach_info, backup_device, properties, is_snapshot) finally: backup = objects.Backup.get_by_id(context, backup.id) self._cleanup_temp_volumes_snapshots_when_backup_created( context, backup)
cinder/backup/drivers/nfs.py (def get_backup_driver) --》 cinder/volume/rpcapi.py(def get_backup_device)--》cinder/volume/manager.py(def get_backup_device)
--》cinder/volume/driver.py
def get_backup_device(self, context, backup): """Get a backup device from an existing volume. The function returns a volume or snapshot to backup service, and then backup service attaches the device and does backup. """ backup_device = None is_snapshot = False if (self.backup_use_temp_snapshot() and self.snapshot_remote_attachable()): (backup_device, is_snapshot) = ( self._get_backup_volume_temp_snapshot(context, backup)) else: backup_device = self._get_backup_volume_temp_volume( context, backup) is_snapshot = False return (backup_device, is_snapshot) def _get_backup_volume_temp_volume(self, context, backup): """Return a volume to do backup. To backup a snapshot, create a temp volume from the snapshot and back it up. Otherwise to backup an in-use volume, create a temp volume and back it up. """ volume = objects.Volume.get_by_id(context, backup.volume_id) snapshot = None if backup.snapshot_id: snapshot = objects.Snapshot.get_by_id(context, backup.snapshot_id) LOG.debug('Creating a new backup for volume %s.', volume['name']) temp_vol_ref = None device_to_backup = volume # NOTE(xyang): If it is to backup from snapshot, create a temp # volume from the source snapshot, backup the temp volume, and # then clean up the temp volume. if snapshot: temp_vol_ref = self._create_temp_volume_from_snapshot( context, volume, snapshot) backup.temp_volume_id = temp_vol_ref['id'] backup.save() device_to_backup = temp_vol_ref else: # NOTE(xyang): Check volume status if it is not to backup from # snapshot; if 'in-use', create a temp volume from the source # volume, backup the temp volume, and then clean up the temp # volume; if 'available', just backup the volume. previous_status = volume.get('previous_status') if previous_status == "in-use": temp_vol_ref = self._create_temp_cloned_volume( context, volume) backup.temp_volume_id = temp_vol_ref['id'] backup.save() device_to_backup = temp_vol_ref return device_to_backup def _create_temp_cloned_volume(self, context, volume): temp_volume = { 'size': volume['size'], 'display_name': 'backup-vol-%s' % volume['id'], 'host': volume['host'], 'user_id': context.user_id, 'project_id': context.project_id, 'status': 'creating', 'attach_status': 'detached', 'availability_zone': volume.availability_zone, } temp_vol_ref = self.db.volume_create(context, temp_volume) try: self.create_cloned_volume(temp_vol_ref, volume) except Exception: with excutils.save_and_reraise_exception(): self.db.volume_destroy(context.elevated(), temp_vol_ref['id']) self.db.volume_update(context, temp_vol_ref['id'], {'status': 'available'}) return temp_vol_ref
--》cinder/volume/drivers/remotefs.py
@locked_volume_id_operation def create_cloned_volume(self, volume, src_vref): """Creates a clone of the specified volume.""" return self._create_cloned_volume(volume, src_vref) def _create_cloned_volume(self, volume, src_vref): LOG.info(_LI('Cloning volume %(src)s to volume %(dst)s'), {'src': src_vref['id'], 'dst': volume['id']}) if src_vref['status'] != 'available': msg = _("Volume status must be 'available'.") raise exception.InvalidVolume(msg) volume_name = CONF.volume_name_template % volume['id'] volume_info = {'provider_location': src_vref['provider_location'], 'size': src_vref['size'], 'id': volume['id'], 'name': volume_name, 'status': src_vref['status']} temp_snapshot = {'volume_name': volume_name, 'size': src_vref['size'], 'volume_size': src_vref['size'], 'name': 'clone-snap-%s' % src_vref['id'], 'volume_id': src_vref['id'], 'id': 'tmp-snap-%s' % src_vref['id'], 'volume': src_vref} self._create_snapshot(temp_snapshot) # 做快照 try: self._copy_volume_from_snapshot(temp_snapshot, # 备份glusterfs卷实际上通过qemu快照方式来备份 volume_info, volume['size']) finally: self._delete_snapshot(temp_snapshot) # 删除这个过渡的快照 return {'provider_location': src_vref['provider_location']}
这里备份glusterfs卷的时候,原来的卷会处于backing-up的状态,这是个社区bug,详情见 https://review.openstack.org/#/c/288875/