临时解决kube-controller-manager 无法创建rbd image 问题

注:以下情况参考http://www.jianshu.com/p/8ce11f947410 基础上进行测试

环境

centos7.2
kubeadm 1.6.1

问题现象

[root@cloud4ourself-kube1 manifests]# kubectl get pvc
NAME           STATUS    VOLUME                                     CAPACITY   ACCESSMODES   STORAGECLASS     AGE
datadir-zk-0   Pending                                                                       slow             16h
gluster1       Bound     pvc-45c39fbe-2a57-11e7-bdbe-fa163e72ab1f   5Gi        RWO           gluster-heketi   18h

[root@cloud4ourself-kube1 manifests]# kubectl describe  pvc datadir-zk-0
Name:       datadir-zk-0
Namespace:  default
StorageClass:   slow
Status:     Pending
Volume:
Labels:     app=zk
Annotations:    volume.beta.kubernetes.io/storage-class=slow
        volume.beta.kubernetes.io/storage-provisioner=kubernetes.io/rbd
Capacity:
Access Modes:
Events:
  FirstSeen LastSeen    Count   From                SubObjectPath   Type        Reason          Message
  --------- --------    -----   ----                -------------   --------    ------          -------
  16h       36m     3850    persistentvolume-controller         Warning     ProvisioningFailed  Failed to provision volume with StorageClass "slow": failed to create rbd image: executable file not found in $PATH, command output:

问题原因

由于kube-controller-manager使用容器方式运行,gcr.io/google_containers/kube-controller-manager-amd64:v1.6.0而该容器不包含rbd,
因此kube-controller-manager在创建pv时,无法调用rbd,解决办法就是将kube-controller-manager运行在有rbd的环境中,
本次临时将kube-controller-manager从容器提取出来,在master节点服务器上用命令方式运行

复制kube-controller-manager

[root@cloud4ourself-kube1 kubernetes]# kubectl exec -it kube-controller-manager-cloud4ourself-kube1.novalocal -n kube-system --
/ #
/ #
/ #
/ # cd /usr/local/bin/
/usr/local/bin # ls
kube-controller-manager
/usr/local/bin # df
Filesystem           1K-blocks      Used Available Use% Mounted on
/dev/mapper/docker-253:1-201327648-973d5c7c4aa6e982d0b5eef5e301dd7261b7470d8de8a508f85942441c39237c
                      10474496    163204  10311292   2% /
tmpfs                  1941100         0   1941100   0% /dev
tmpfs                  1941100         0   1941100   0% /sys/fs/cgroup
/dev/vda1             41930056   4704356  37225700  11% /etc/pki
/dev/vda1             41930056   4704356  37225700  11% /dev/termination-log
/dev/vda1             41930056   4704356  37225700  11% /etc/kubernetes
/dev/vda1             41930056   4704356  37225700  11% /etc/resolv.conf
/dev/vda1             41930056   4704356  37225700  11% /etc/hostname
/dev/vda1             41930056   4704356  37225700  11% /etc/hosts
shm                      65536         0     65536   0% /dev/shm
/dev/vda1             41930056   4704356  37225700  11% /etc/ssl/certs
tmpfs                  1941100         0   1941100   0% /proc/kcore
tmpfs                  1941100         0   1941100   0% /proc/timer_list
tmpfs                  1941100         0   1941100   0% /proc/timer_stats
tmpfs                  1941100         0   1941100   0% /proc/sched_debug
/usr/local/bin # cp kube-controller-manager /etc/kubernetes/
cp: can't create '/etc/kubernetes/kube-controller-manager': Read-only file system
/usr/local/bin # cp kube-controller-manager /etc/pki/
/usr/local/bin # cd /etc/pki/
/etc/pki # ls
CA                       java                     nssdb                    rsyslog
ca-trust                 kube-controller-manager  rpm-gpg                  tls
/etc/pki # ps
PID   USER     TIME   COMMAND
    1 root       0:02 kube-controller-manager --controllers=*,bootstrapsigner,tokencleaner --kubeconfig=/etc/kubernetes/controller-manager.conf --service-account-private-key-file=/etc/kubernetes/pki/sa.key --a
   13 root       0:00 sh
   23 root       0:00 ps


/etc/pki # cat /proc/1/cmdline
kube-controller-manager--controllers=*,bootstrapsigner,tokencleaner--kubeconfig=/etc/kubernetes/controller-manager.conf--service-account-private-key-file=/etc/kubernetes/pki/sa.key--address=127.0.0.1--leader-elect=true--insecure-experimental-approve-all-kubelet-csrs-for-group=system:bootstrappers--cluster-signing-key-file=/etc/kubernetes/pki/ca.key--use-service-account-credentials=true--root-ca-file=/etc/kubernetes/pki/ca.crt--cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt/etc/pki #

移动kube-controller-manager.yaml

cd /etc/kubernetes
[root@cloud4ourself-kube1 kubernetes]# kubectl get pod -n kube-system
NAME                                                    READY     STATUS    RESTARTS   AGE
calico-etcd-vbc6x                                       1/1       Running   1          1d
calico-node-0hbw2                                       2/2       Running   0          1d
calico-node-7x2jv                                       2/2       Running   4          1d
calico-node-cr8dc                                       2/2       Running   1          1d
calico-node-z928t                                       2/2       Running   2          1d
calico-policy-controller-2561685917-13j12               1/1       Running   1          1d
etcd-cloud4ourself-kube1.novalocal                      1/1       Running   1          1d
kube-apiserver-cloud4ourself-kube1.novalocal            1/1       Running   1          1d
kube-controller-manager-cloud4ourself-kube1.novalocal   1/1       Running   0          51m
kube-dns-3913472980-zfpkg                               3/3       Running   622        1d
kube-proxy-4h57v                                        1/1       Running   1          1d
kube-proxy-r5wqj                                        1/1       Running   0          1d
kube-proxy-r8b5p                                        1/1       Running   1          1d
kube-proxy-wfmrd                                        1/1       Running   0          1d
kube-scheduler-cloud4ourself-kube1.novalocal            1/1       Running   1          1d
[root@cloud4ourself-kube1 kubernetes]# mv manifests/kube-controller-manager.yaml .
[root@cloud4ourself-kube1 kubernetes]# kubectl get pod -n kube-system
NAME                                           READY     STATUS    RESTARTS   AGE
calico-etcd-vbc6x                              1/1       Running   1          1d
calico-node-0hbw2                              2/2       Running   0          1d
calico-node-7x2jv                              2/2       Running   4          1d
calico-node-cr8dc                              2/2       Running   1          1d
calico-node-z928t                              2/2       Running   2          1d
calico-policy-controller-2561685917-13j12      1/1       Running   1          1d
etcd-cloud4ourself-kube1.novalocal             1/1       Running   1          1d
kube-apiserver-cloud4ourself-kube1.novalocal   1/1       Running   1          1d
kube-dns-3913472980-zfpkg                      3/3       Running   622        1d
kube-proxy-4h57v                               1/1       Running   1          1d
kube-proxy-r5wqj                               1/1       Running   0          1d
kube-proxy-r8b5p                               1/1       Running   1          1d
kube-proxy-wfmrd                               1/1       Running   0          1d
kube-scheduler-cloud4ourself-kube1.novalocal   1/1       Running   1          1d

在服务器里启动(非容器方式)

/etc/pki/kube-controller-manager --controllers=*,bootstrapsigner,tokencleaner --kubeconfig=/etc/kubernetes/controller-manager.conf --service-account-private-key-file=/etc/kubernetes/pki/sa.key --address=127.0.0.1 --leader-elect=true --insecure-experimental-approve-all-kubelet-csrs-for-group=system:bootstrappers --cluster-signing-key-file=/etc/kubernetes/pki/ca.key --use-service-account-credentials=true --root-ca-file=/etc/kubernetes/pki/ca.crt --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt/etc/pki/ca.crt

在此确认pvc

[root@cloud4ourself-kube1 manifests]# kubectl get pvc
NAME           STATUS    VOLUME                                     CAPACITY   ACCESSMODES   STORAGECLASS     AGE
datadir-zk-0   Bound     pvc-7f1a0c7e-2a67-11e7-bdbe-fa163e72ab1f   1Gi        RWO           slow             17h
gluster1       Bound     pvc-45c39fbe-2a57-11e7-bdbe-fa163e72ab1f   5Gi        RWO           gluster-heketi   19h
[root@cloud4ourself-kube1 manifests]# kubectl get pv
NAME                                       CAPACITY   ACCESSMODES   RECLAIMPOLICY   STATUS    CLAIM                  STORAGECLASS     REASON    AGE
pvc-45c39fbe-2a57-11e7-bdbe-fa163e72ab1f   5Gi        RWO           Delete          Bound     default/gluster1       gluster-heketi             19h
pvc-7f1a0c7e-2a67-11e7-bdbe-fa163e72ab1f   1Gi        RWO           Delete          Bound     default/datadir-zk-0   slow                       16s

待pv全部创建完毕

[root@cloud4ourself-kube1 pki]# kubectl get pod
NAME                      READY     STATUS    RESTARTS   AGE
glusterfs-66gn0           1/1       Running   1          20h
glusterfs-srhh8           1/1       Running   0          20h
glusterfs-z55q5           1/1       Running   0          20h
heketi-1125625054-pl9qc   1/1       Running   0          20h
zk-0                      1/1       Running   0          17h
zk-1                      1/1       Running   0          9m
zk-2                      1/1       Running   0          7m
[root@cloud4ourself-kube1 pki]# kubectl get pvc
NAME           STATUS    VOLUME                                     CAPACITY   ACCESSMODES   STORAGECLASS     AGE
datadir-zk-0   Bound     pvc-7f1a0c7e-2a67-11e7-bdbe-fa163e72ab1f   1Gi        RWO           slow             17h
datadir-zk-1   Bound     pvc-c58c0a6e-2afc-11e7-bdbe-fa163e72ab1f   1Gi        RWO           slow             9m
datadir-zk-2   Bound     pvc-19b76842-2afd-11e7-bdbe-fa163e72ab1f   1Gi        RWO           slow             7m
gluster1       Bound     pvc-45c39fbe-2a57-11e7-bdbe-fa163e72ab1f   5Gi        RWO           gluster-heketi   19h
[root@cloud4ourself-kube1 pki]# kubectl get pv
NAME                                       CAPACITY   ACCESSMODES   RECLAIMPOLICY   STATUS    CLAIM                  STORAGECLASS    REASON    AGE
pvc-19b76842-2afd-11e7-bdbe-fa163e72ab1f   1Gi        RWO           Delete          Bound     default/datadir-zk-2   slow              7m
pvc-45c39fbe-2a57-11e7-bdbe-fa163e72ab1f   5Gi        RWO           Delete          Bound     default/gluster1       gluster-heketi             19h
pvc-7f1a0c7e-2a67-11e7-bdbe-fa163e72ab1f   1Gi        RWO           Delete          Bound     default/datadir-zk-0   slow              12m
pvc-c58c0a6e-2afc-11e7-bdbe-fa163e72ab1f   1Gi        RWO           Delete          Bound     default/datadir-zk-1   slow              9m

重新恢复容器方式

# 停止kube-controller-manager
# 移动配置文件
[root@cloud4ourself-kube1 kubernetes]# mv kube-controller-manager.yaml manifests/
[root@cloud4ourself-kube1 kubernetes]# kubectl get pod -n kube-system
NAME                                                    READY     STATUS    RESTARTS   AGE
calico-etcd-vbc6x                                       1/1       Running   1          1d
calico-node-0hbw2                                       2/2       Running   0          1d
calico-node-7x2jv                                       2/2       Running   4          1d
calico-node-cr8dc                                       2/2       Running   1          1d
calico-node-z928t                                       2/2       Running   2          1d
calico-policy-controller-2561685917-13j12               1/1       Running   1          1d
etcd-cloud4ourself-kube1.novalocal                      1/1       Running   1          1d
kube-apiserver-cloud4ourself-kube1.novalocal            1/1       Running   1          1d
kube-controller-manager-cloud4ourself-kube1.novalocal   1/1       Running   0          7s
kube-dns-3913472980-zfpkg                               3/3       Running   622        1d
kube-proxy-4h57v                                        1/1       Running   1          1d
kube-proxy-r5wqj                                        1/1       Running   0          1d
kube-proxy-r8b5p                                        1/1       Running   1          1d
kube-proxy-wfmrd                                        1/1       Running   0          1d
kube-scheduler-cloud4ourself-kube1.novalocal            1/1       Running   1          1d

你可能感兴趣的:(临时解决kube-controller-manager 无法创建rbd image 问题)