清理ceph集群-重建ceph集群

背景:

线上集群坏了,重建集群

步骤:清理ceph环境:--> 更新yum源以及在各节点更新ceph版本 ---> 使用ceph-deploy创建集群

清理osd脚本:

#!/bin/bash

all_osds=`/usr/bin/ls -l /var/lib/ceph/osd |/usr/bin/grep 'ceph'|/usr/bin/awk '{print $9}'|/usr/bin/awk -F '-' '{print $2}'|/usr/bin/sort -n`
delete_all_osd(){

    /usr/bin/systemctl stop ceph-osd.target
    /usr/bin/sleep 5
    for x in $all_osds;
    do
        osd=$x
        clusterinfo=`/usr/bin/ceph osd find ${osd}|/usr/bin/grep -w 'host'|/usr/bin/head -n1 |/usr/bin/awk -F '"' '{print $4}'`
        ifexsit=`/usr/bin/hostname|/usr/bin/grep $clusterinfo -c`
        if [ $ifexsit -eq 1 ];then
            /usr/bin/echo "osd.${osd} is removing..."
            /usr/bin/ceph osd down $osd
            /usr/bin/ceph osd destroy $osd --yes-i-really-mean-it
            /usr/bin/ceph osd out $osd
            /usr/bin/rm -f /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-$osd-*
            /usr/bin/umount /var/lib/ceph/osd/ceph-$osd
            /usr/bin/rm -rf /var/lib/ceph/osd/ceph-$osd
        fi
    done
}

delete_all_ceph_lvm(){

    all_lvs=`/usr/sbin/lvdisplay|/usr/bin/grep 'LV Path'|/usr/bin/grep -w 'ceph'|/usr/bin/awk '{print $NF}'`
    all_vgs=`/usr/sbin/vgdisplay|/usr/bin/grep 'VG Name'|/usr/bin/grep -w 'ceph'|/usr/bin/awk '{print $NF}'`
    all_pvs=`/usr/sbin/pvdisplay|/usr/bin/grep -E 'VG Name|PV Name'|/usr/bin/grep 'ceph' -B1|/usr/bin/grep 'PV'|/usr/bin/awk '{print $NF}'`
    for x in $all_lvs;
    do
        /usr/sbin/lvremove -y $x
    done
    
    for x in $all_vgs;
    do
        /usr/sbin/vgremove -y $x
    done
    
    for x in $all_pvs;
    do
        /usr/sbin/pvremove -y $x
    done
    /usr/bin/timeout -s 9 5s /usr/sbin/lvscan --cache
    /usr/bin/timeout -s 9 5s /usr/sbin/vgscan --cache
    /usr/bin/timeout -s 9 5s /usr/sbin/pvscan --cache
}

whoami=`/usr/bin/id -u`
if [ $whoami != 0 ];then
    /usr/bin/echo "need sudo privilege"
else
    read -p 'Are you sure to delete all osds?[Y/N] ' input
    case $input in 
        [yY][eE][sS]|[yY])
        echo $input
        delete_all_osd
        delete_all_ceph_lvm
        ;;
        [nN][oO]|[nN])
        echo 'Nothing todo'
        ;;
    
        *)
        echo 'wrong input'
    esac
fi

如果有的osd清理不干净,就手动清理吧。

清理集群相关文件:

#先停服务
ps aux|grep ceph |awk '{print $2}'|xargs kill -9

#ceph.conf备份
mv /etc/ceph/ceph.conf /tmp/ceph.conf

#清除相关文件
rm -rf /var/lib/ceph/osd/*
rm -rf /var/lib/ceph/mon/*
rm -rf /var/lib/ceph/mds/*
rm -rf /var/lib/ceph/rgw/*
rm -rf /var/lib/ceph/bootstrap-mds/*
rm -rf /var/lib/ceph/bootstrap-osd/*
rm -rf /var/lib/ceph/bootstrap-rgw/*
rm -rf /var/lib/ceph/bootstrap-mgr/*
rm -rf /var/lib/ceph/bootstrap-rbd/*
rm -rf /var/lib/ceph/tmp/*
rm -rf /etc/ceph/*
rm -rf /var/run/ceph/*

清理干净之后,在各个节点上面修改yum源-选直接要安装的版本,安装ceph

#luminous -> nautilus
cat /etc/yum.repos.d/ceph.repo
[Ceph]
name=Ceph packages for $basearch
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/$basearch
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
[Ceph-noarch]
name=Ceph noarch packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/SRPMS
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1

sudo yum install ceph epel-release ceph-radosgw

后面使用ceph-deploy重建集群,这个就不详细说了,具体步骤可以参照官网

你可能感兴趣的:(ceph,ceph,linux)