1 确认故障磁盘
dmesg
[6061566.878131] sd 0:2:2:0: [sdc]
[6061566.878141] sd 0:2:2:0: [sdc]
[6061566.878147] sd 0:2:2:0: [sdc]
[6061566.878152] sd 0:2:2:0: [sdc] CDB:
[6061566.878162] end_request: critical medium error, dev sdc, sector 4973176
[6061617.793479] sd 0:2:5:0: [sdf]
[6061617.793501] sd 0:2:5:0: [sdf]
[6061617.793513] sd 0:2:5:0: [sdf]
[6061617.793530] sd 0:2:5:0: [sdf] CDB:
[6061617.793549] end_request: critical medium error, dev sdf, sector 12773264
或者使用 megaCli 工具进行检测
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -PDList -aALL | less
Enclosure Device ID: 0
Slot Number: 3
Enclosure position: 0
Device Id: 2
Sequence Number: 2
Media Error Count: 227 <- 物理故障
Other Error Count: 2
Enclosure Device ID: 0
Slot Number: 6
Enclosure position: 0
Device Id: 5
Sequence Number: 2
Media Error Count: 573 <- 物理故障
Other Error Count: 0
Predictive Failure Count: 0
确认对应的 OSD NUM
[root@hh-yun-ceph-cinder016-128056 ~]# mount | grep -E 'sdc|sdf'
/dev/sdc1 on /var/lib/ceph/osd/ceph-11 type xfs (rw,relatime,attr2,inode64,noquota)
/dev/sdf1 on /var/lib/ceph/osd/ceph-14 type xfs (rw,relatime,attr2,inode64,noquota)
停止故障 ceph
[root@hh-yun-ceph-cinder016-128056 ~]# /etc/init.d/ceph stop osd.11
=== osd.11 ===
Stopping Ceph osd.11 on hh-yun-ceph-cinder016-128056...kill 173789...kill 173789...done
[root@hh-yun-ceph-cinder016-128056 ~]# /etc/init.d/ceph stop osd.14
=== osd.14 ===
Stopping Ceph osd.14 on hh-yun-ceph-cinder016-128056...kill 173789...kill 173789...done
4. 检测 ceph 数据迁移情况
[root@hh-yun-ceph-cinder016-128056 ~]# ceph -s
cluster dc4f91c1-8792-4948-b68f-2fcea75f53b9
health HEALTH_WARN 103 pgs backfill; 184 pgs backfilling; 288 pgs degraded; 11 pgs peering; 1 pgs recovery_wait; 287 pgs stuck degraded; 325 pgs stuck unclean; 294 pgs stuck undersized; 294 pgs undersized; 5 requests are blocked > 32 sec; recovery 57850/11243389 objects degraded (0.515%); 154869/11243389 objects misplaced (1.377%)
monmap e3: 5 mons at {hh-yun-ceph-cinder015-128055=240.30.128.55:6789/0,hh-yun-ceph-cinder017-128057=240.30.128.57:6789/0,hh-yun-ceph-cinder024-128074=240.30.128.74:6789/0,hh-yun-ceph-cinder025-128075=240.30.128.75:6789/0,hh-yun-ceph-cinder026-128076=240.30.128.76:6789/0}, election epoch 22, quorum 0,1,2,3,4 hh-yun-ceph-cinder015-128055,hh-yun-ceph-cinder017-128057,hh-yun-ceph-cinder024-128074,hh-yun-ceph-cinder025-128075,hh-yun-ceph-cinder026-128076
osdmap e1577: 70 osds: 68 up, 68 in
pgmap v7535397: 20544 pgs, 2 pools, 14286 GB data, 3637 kobjects
42770 GB used, 205 TB / 247 TB avail
57850/11243389 objects degraded (0.515%); 154869/11243389 objects misplaced (1.377%)
19 inactive
20219 active+clean
11 peering
1 active+recovery_wait+degraded
184 active+undersized+degraded+remapped+backfilling
103 active+undersized+degraded+remapped+wait_backfill
7 active+undersized+remapped
recovery io 1336 MB/s, 341 objects/s
client io 709 kB/s rd, 1282 kB/s wr, 114 op/s
待数据迁移完成
[root@hh-yun-ceph-cinder015-128055 ceph]# ceph -s
cluster dc4f91c1-8792-4948-b68f-2fcea75f53b9
health HEALTH_OK
monmap e3: 5 mons at {hh-yun-ceph-cinder015-128055=240.30.128.55:6789/0,hh-yun-ceph-cinder017-128057=240.30.128.57:6789/0,hh-yun-ceph-cinder024-128074=240.30.128.74:6789/0,hh-yun-ceph-cinder025-128075=240.30.128.75:6789/0,hh-yun-ceph-cinder026-128076=240.30.128.76:6789/0}, election epoch 22, quorum 0,1,2,3,4 hh-yun-ceph-cinder015-128055,hh-yun-ceph-cinder017-128057,hh-yun-ceph-cinder024-128074,hh-yun-ceph-cinder025-128075,hh-yun-ceph-cinder026-128076
osdmap e1858: 70 osds: 68 up, 68 in
pgmap v7536467: 20544 pgs, 2 pools, 14286 GB data, 3637 kobjects
42788 GB used, 205 TB / 247 TB avail
1 active+clean+scrubbing+deep <--- 数据清洗中, 可忽略
20543 active+clean
更换硬盘前, 切记修复 fstab 避免重启后无法正常挂载磁盘导致无法登录
#/dev/sdc1 /var/lib/ceph/osd/ceph-11 xfs defaults 0 0
/dev/sdd1 /var/lib/ceph/osd/ceph-12 xfs defaults 0 0
/dev/sde1 /var/lib/ceph/osd/ceph-13 xfs defaults 0 0
#/dev/sdf1 /var/lib/ceph/osd/ceph-14 xfs defaults 0 0
更换硬盘, 并查询
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -PDList -aALL | less
Enclosure Device ID: 0
Slot Number: 3
Enclosure position: 0
Device Id: 2
Sequence Number: 7
Media Error Count: 0
Other Error Count: 0
Predictive Failure Count: 0
Enclosure Device ID: 0
Slot Number: 6
Enclosure position: 0
Device Id: 5
Sequence Number: 7
Media Error Count: 0
Other Error Count: 0
Predictive Failure Count: 0
重启后, 重新组RAID
获得 RAID DEVICE ID
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -PDlist -aALL | grep "ID" | uniq
Enclosure Device ID: 0
查询当前 RAID 设备信息
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -cfgdsply –aALL | grep -E "DISK\ GROUP|Slot\ Number"
Number of DISK GROUPS: 9
DISK GROUP: 0
Slot Number: 0
Slot Number: 1
DISK GROUP: 1
Slot Number: 2
DISK GROUP: 2
Slot Number: 4 <-- 看到缺少了之前更换的 slot number 3
DISK GROUP: 3
Slot Number: 5
DISK GROUP: 4
Slot Number: 7 <-- 看到缺少了之前更换的 slot number 6
DISK GROUP: 5
Slot Number: 8
DISK GROUP: 6
Slot Number: 9
DISK GROUP: 7
Slot Number: 10
DISK GROUP: 8
Slot Number: 11
重新组建 RAID
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -CfgLdAdd -r0 [0:3] WB Direct -a0
Adapter 0: Created VD 2
Adapter 0: Configured the Adapter!!
Exit Code: 0x00
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -CfgLdAdd -r0 [0:6] WB Direct -a0
Adapter 0: Created VD 5
Adapter 0: Configured the Adapter!
Exit Code: 0x00
查询新的 RAID 组
[root@hh-yun-ceph-cinder016-128056 ~]# MegaCli -cfgdsply –aALL | grep -E "DISK\ GROUP|Slot\ Number"
Number of DISK GROUPS: 11
DISK GROUP: 0
Slot Number: 0
Slot Number: 1
DISK GROUP: 1
Slot Number: 2
DISK GROUP: 2
Slot Number: 4
DISK GROUP: 3
Slot Number: 5
DISK GROUP: 4
Slot Number: 7
DISK GROUP: 5
Slot Number: 8
DISK GROUP: 6
Slot Number: 9
DISK GROUP: 7
Slot Number: 10
DISK GROUP: 8
Slot Number: 11
DISK GROUP: 9
Slot Number: 3 <- 新的 RAID
DISK GROUP: 10
Slot Number: 6 <- 新的 RAID
重新分区
dd if=/dev/zero of=/dev/sdc bs=1M count=100
dd if=/dev/zero of=/dev/sdf bs=1M count=100
parted -s /dev/sdc mklabel GPT
parted -s /dev/sdf mklabel GPT
parted /dev/sdc mkpart primary xfs 1 100%
parted /dev/sdf mkpart primary xfs 1 100%
[root@hh-yun-ceph-cinder016-128056 ~]# fdisk -l | grep GPT | sort
/dev/sda1 * 1 4294967295 2147483647+ ee GPT
/dev/sdb1 1 4294967295 2147483647+ ee GPT
/dev/sdc1 1 4294967295 2147483647+ ee GPT
/dev/sdd1 1 4294967295 2147483647+ ee GPT
/dev/sde1 1 4294967295 2147483647+ ee GPT
/dev/sdf1 1 4294967295 2147483647+ ee GPT
/dev/sdg1 1 4294967295 2147483647+ ee GPT
/dev/sdh1 1 4294967295 2147483647+ ee GPT
/dev/sdi1 1 4294967295 2147483647+ ee GPT
/dev/sdj1 1 4294967295 2147483647+ ee GPT
/dev/sdk1 1 4294967295 2147483647+ ee GPT
格式化
[root@hh-yun-ceph-cinder016-128056 ~]# mkfs.xfs -f -i size=512 /dev/sdc1
meta-data=/dev/sdc1 isize=512 agcount=4, agsize=244140480 blks
= sectsz=4096 attr=2, projid32bit=1
= crc=0 finobt=0
data = bsize=4096 blocks=976561920, imaxpct=5
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=0
log =internal log bsize=4096 blocks=476836, version=2
= sectsz=4096 sunit=1 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
[root@hh-yun-ceph-cinder016-128056 ~]# mkfs.xfs -f -i size=512 /dev/sdf1
meta-data=/dev/sdf1 isize=512 agcount=4, agsize=244140480 blks
= sectsz=4096 attr=2, projid32bit=1
= crc=0 finobt=0
data = bsize=4096 blocks=976561920, imaxpct=5
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=0
log =internal log bsize=4096 blocks=476836, version=2
= sectsz=4096 sunit=1 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
修复 fstab 重新挂载硬盘
/dev/sdb1 /var/lib/ceph/osd/ceph-10 xfs defaults 0 0
/dev/sdc1 /var/lib/ceph/osd/ceph-11 xfs defaults 0 0
/dev/sdd1 /var/lib/ceph/osd/ceph-12 xfs defaults 0 0
/dev/sde1 /var/lib/ceph/osd/ceph-13 xfs defaults 0 0
/dev/sdf1 /var/lib/ceph/osd/ceph-14 xfs defaults 0 0
/dev/sdg1 /var/lib/ceph/osd/ceph-15 xfs defaults 0 0
/dev/sdh1 /var/lib/ceph/osd/ceph-16 xfs defaults 0 0
/dev/sdi1 /var/lib/ceph/osd/ceph-17 xfs defaults 0 0
/dev/sdj1 /var/lib/ceph/osd/ceph-18 xfs defaults 0 0
/dev/sdk1 /var/lib/ceph/osd/ceph-19 xfs defaults 0 0
mount -a
重新初始化osd
[root@hh-yun-ceph-cinder016-128056 ~]# ceph-osd -i 11 --mkfs --mkkey --osd-uuid dc4f91c1-8792-4948-b68f-2fcea75f53b9
2015-11-08 12:34:30.542131 7fa492027880 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway
2015-11-08 12:34:30.546178 7fa492027880 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway
2015-11-08 12:34:30.546905 7fa492027880 -1 filestore(/var/lib/ceph/osd/ceph-11) could not find 23c2fcde/osd_superblock/0//-1 in index: (2) No such file or directory
2015-11-08 12:34:30.552611 7fa492027880 -1 created object store /var/lib/ceph/osd/ceph-11 journal /var/lib/ceph/osd/ceph-11/journal for osd.11 fsid dc4f91c1-8792-4948-b68f-2fcea75f53b9
2015-11-08 12:34:30.552664 7fa492027880 -1 auth: error reading file: /var/lib/ceph/osd/ceph-11/keyring: can't open /var/lib/ceph/osd/ceph-11/keyring: (2) No such file or directory
2015-11-08 12:34:30.552773 7fa492027880 -1 created new key in keyring /var/lib/ceph/osd/ceph-11/keyring
[root@hh-yun-ceph-cinder016-128056 ~]# ceph-osd -i 14 --mkfs --mkkey --osd-uuid dc4f91c1-8792-4948-b68f-2fcea75f53b9
2015-11-08 12:34:57.552442 7f1c850f2880 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway
2015-11-08 12:34:57.555955 7f1c850f2880 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway
2015-11-08 12:34:57.556460 7f1c850f2880 -1 filestore(/var/lib/ceph/osd/ceph-14) could not find 23c2fcde/osd_superblock/0//-1 in index: (2) No such file or directory
2015-11-08 12:34:57.562770 7f1c850f2880 -1 created object store /var/lib/ceph/osd/ceph-14 journal /var/lib/ceph/osd/ceph-14/journal for osd.14 fsid dc4f91c1-8792-4948-b68f-2fcea75f53b9
2015-11-08 12:34:57.562815 7f1c850f2880 -1 auth: error reading file: /var/lib/ceph/osd/ceph-14/keyring: can't open /var/lib/ceph/osd/ceph-14/keyring: (2) No such file or directory
2015-11-08 12:34:57.562906 7f1c850f2880 -1 created new key in keyring /var/lib/ceph/osd/ceph-14/keyring
切记修改 keyring 否则会报错 (参考信息:)
=== osd.11 ===
2015-11-08 12:40:36.113303 7fa55404e700 0 librados: osd.11 authentication error (1) Operation not permitted
Error connecting to cluster: PermissionError
修复 keyring 方法
ceph auth list | grep -C 5 osd.14
osd.14
key: AQBoxrlVcBOsGxAABmj0YHwxa6LXzYQUJaTHNw== <- 把 KEY 写入对应 osd.14 keyring 文件 /var/lib/ceph/osd/ceph-14/keyring
caps: [mon] allow profile osd
caps: [osd] allow *
启动 ceph 集群后验证
数据自动迁移, 修复中
[root@hh-yun-ceph-cinder015-128055 ceph]# ceph -s
cluster dc4f91c1-8792-4948-b68f-2fcea75f53b9
health HEALTH_WARN 1721 pgs backfill; 20 pgs backfilling; 32 pgs degraded; 1 pgs peering; 20 pgs recovering; 12 pgs recovery_wait; 32 pgs stuck degraded; 1774 pgs stuck unclean; recovery 13646/11765653 objects degraded (0.116%); 1178529/11765653 objects misplaced (10.017%)
monmap e3: 5 mons at {hh-yun-ceph-cinder015-128055=240.30.128.55:6789/0,hh-yun-ceph-cinder017-128057=240.30.128.57:6789/0,hh-yun-ceph-cinder024-128074=240.30.128.74:6789/0,hh-yun-ceph-cinder025-128075=240.30.128.75:6789/0,hh-yun-ceph-cinder026-128076=240.30.128.76:6789/0}, election epoch 26, quorum 0,1,2,3,4 hh-yun-ceph-cinder015-128055,hh-yun-ceph-cinder017-128057,hh-yun-ceph-cinder024-128074,hh-yun-ceph-cinder025-128075,hh-yun-ceph-cinder026-128076
osdmap e1998: 70 osds: 70 up, 70 in
pgmap v7537087: 20544 pgs, 2 pools, 14286 GB data, 3637 kobjects
42815 GB used, 212 TB / 254 TB avail
13646/11765653 objects degraded (0.116%); 1178529/11765653 objects misplaced (10.017%)
1 peering
1721 active+remapped+wait_backfill
20 active+remapped+backfilling
18770 active+clean
12 active+recovery_wait+degraded
20 active+recovering+degraded
recovery io 415 MB/s, 106 objects/s