记录一次osd已经从集群中踢出,但是osd盘中的数据没有清除,并且lv也没有删除时,如何把osd重新加入回集群.
[root@test-1 osd]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 2.63699 root default
-3 0.87900 host test-1
0 hdd 0.29300 osd.0 up 1.00000 1.00000
1 hdd 0.29300 osd.1 up 1.00000 1.00000
2 hdd 0.29300 osd.2 up 1.00000 1.00000
-5 0.87900 host test-2
3 hdd 0.29300 osd.3 up 1.00000 1.00000
4 hdd 0.29300 osd.4 up 1.00000 1.00000
5 hdd 0.29300 osd.5 up 1.00000 1.00000
-7 0.87900 host test-3
6 hdd 0.29300 osd.6 up 1.00000 1.00000
7 hdd 0.29300 osd.7 up 1.00000 1.00000
8 hdd 0.29300 osd.8 up 1.00000 1.00000
[root@test-1 osd]# rados -p test ls
1
5
3
4
2
[root@test-1 osd]# ceph osd map test 1
osdmap e252 pool 'test' (6) object '1' -> pg 6.437e2a40 (6.0) -> up ([0,5], p0) acting ([0,5], p0)//名字为1的对象落在了0号和5号osd上
[root@test-1 osd]#
###删除osd之前一定要停止osd!!!!!
[root@test-1 osd]# ceph osd crush rm osd.0
removed item id 0 name 'osd.0' from crush map
[root@test-1 osd]# ceph auth del osd.0
updated
[root@test-1 osd]# ceph osd rm 0
removed osd.0
[root@test-1 osd]# ceph osd crush rm osd.5
removed item id 5 name 'osd.5' from crush map
[root@test-1 osd]# ceph auth del osd.5
updated
[root@test-1 osd]# ceph osd rm 5
removed osd.5
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 2.05099 root default
-3 0.58600 host test-1
1 hdd 0.29300 osd.1 up 1.00000 1.00000
2 hdd 0.29300 osd.2 up 1.00000 1.00000
-5 0.58600 host test-2
3 hdd 0.29300 osd.3 up 1.00000 1.00000
4 hdd 0.29300 osd.4 up 1.00000 1.00000
-7 0.87900 host test-3
6 hdd 0.29300 osd.6 up 1.00000 1.00000
7 hdd 0.29300 osd.7 up 1.00000 1.00000
8 hdd 0.29300 osd.8 up 1.00000 1.00000
这时原来的“1”对象已经找不到了。
[root@test-1 osd]# ceph osd create
0//本例中我们就是要添加0号osd
[root@test-1 dev]# ll /dev/ceph*
ceph-2f548b80-a55a-423d-8351-9f9d948038d2:
total 0
lrwxrwxrwx 1 ceph ceph 7 Oct 8 10:27 osd-block-68aa45a5-8fe0-4e11-9014-900b717db5ca -> ../dm-2
ceph-7d254e14-2149-49f6-84b3-7001ccbd6857:
total 0
lrwxrwxrwx 1 ceph ceph 7 Oct 8 10:26 osd-block-3c0bd2e0-88f5-460e-904b-3e0ca337f5f4 -> ../dm-1
ceph-e7878472-0d23-42a4-a9be-d69edc9ed4b0:
total 0
lrwxrwxrwx 1 ceph ceph 7 Oct 8 14:58 osd-block-8b0394e4-1dcc-44c1-82b7-864b2162de38 -> ../dm-0###8b0394e4-1dcc-44c1-82b7-864b2162de38是fsid
###从lv中找到osd.0对应的lv,osd-block后面的内容就是这个osd的fsid,再激活的时候需要用到.
[root@test-1 dev]# ceph-volume lvm activate 0 8b0394e4-1dcc-44c1-82b7-864b2162de38
Running command: mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-0
Running command: ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-e7878472-0d23-42a4-a9be-d69edc9ed4b0/osd-block-8b0394e4-1dcc-44c1-82b7-864b2162de38 --path /var/lib/ceph/osd/ceph-0
Running command: ln -snf /dev/ceph-e7878472-0d23-42a4-a9be-d69edc9ed4b0/osd-block-8b0394e4-1dcc-44c1-82b7-864b2162de38 /var/lib/ceph/osd/ceph-0/block
Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-0/block
Running command: chown -R ceph:ceph /dev/dm-0
Running command: chown -R ceph:ceph /var/lib/ceph/osd/ceph-0
Running command: ln -snf /dev/vdf2 /var/lib/ceph/osd/ceph-0/block.db
Running command: chown -R ceph:ceph /dev/vdf2
Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-0/block.db
Running command: chown -R ceph:ceph /dev/vdf2
Running command: ln -snf /dev/vdf1 /var/lib/ceph/osd/ceph-0/block.wal
Running command: chown -R ceph:ceph /dev/vdf1
Running command: chown -h ceph:ceph /var/lib/ceph/osd/ceph-0/block.wal
Running command: chown -R ceph:ceph /dev/vdf1
Running command: systemctl enable ceph-volume@lvm-0-8b0394e4-1dcc-44c1-82b7-864b2162de38
Running command: systemctl start ceph-osd@0
--> ceph-volume lvm activate successful for osd ID: 0
[root@test-1 dev]# ll /var/lib/ceph/osd/ceph-0/
total 24
lrwxrwxrwx 1 ceph ceph 93 Oct 8 15:17 block -> /dev/ceph-e7878472-0d23-42a4-a9be-d69edc9ed4b0/osd-block-8b0394e4-1dcc-44c1-82b7-864b2162de38
lrwxrwxrwx 1 ceph ceph 9 Oct 8 15:17 block.db -> /dev/vdf2
lrwxrwxrwx 1 ceph ceph 9 Oct 8 15:17 block.wal -> /dev/vdf1
-rw------- 1 ceph ceph 37 Oct 8 15:17 ceph_fsid
-rw------- 1 ceph ceph 37 Oct 8 15:17 fsid
-rw------- 1 ceph ceph 55 Oct 8 15:17 keyring
-rw------- 1 ceph ceph 6 Oct 8 15:17 ready
-rw------- 1 ceph ceph 10 Oct 8 15:17 type
-rw------- 1 ceph ceph 2 Oct 8 15:17 whoami
[root@test-1 dev]# ceph auth add osd.0 osd 'allow *' mon 'allow rwx' -i /var/lib/ceph/osd/ceph-0/keyring
added key for osd.0
[root@test-1 dev]# ceph osd crush add 0 0.29300 host=test-1
set item id 0 name 'osd.0' weight 0.293 at location {host=test-1}: no change
[root@test-1 dev]# ceph -s
cluster:
id: acc6dc6a-79cd-45dc-bf1f-83a576eb8039
health: HEALTH_WARN
application not enabled on 2 pool(s)
clock skew detected on mon.test-2, mon.test-3
services:
mon: 3 daemons, quorum test-1,test-2,test-3
mgr: test-1(active)
osd: 8 osds: 8 up, 8 in
data:
pools: 2 pools, 128 pgs
objects: 13 objects, 8198 kB
usage: 8490 MB used, 2391 GB / 2399 GB avail
pgs: 128 active+clean
[root@test-1 dev]# rados -p test ls
1
5
3
4
2
总结:
1.ceph osd create 直到create到指定的序号
2.激活osdtmpfs目录.
3.ceph auth add osd.osd ‘allow *’ mon ‘allow rwx’ -i /var/lib/ceph/osd/ceph- /keyring
4.ceph osd crush addhost=
5.重启osd.
如果是filestore,只需要把第二步去掉即可.