openstack rabbitmq报错导致识别不到计算节点 解决方案

1 问题的现象

[root@controller160 ~]# less /var/log/rabbitmq/[email protected]
2020-07-16 14:32:25.765 [error] <0.21731.1548> Channel error on connection <0.20499.1547> (172.16.1.160:34298 -> 172.16.1.160:5672, vhost: '/', user: 'rabbitmq'), channel 1:
operation queue.declare caused a channel exception not_found: failed to perform operation on queue 'conductor.controller160' in vhost '/' due to timeout
2020-07-16 14:32:25.834 [warning] <0.20499.1547> closing AMQP connection <0.20499.1547> (172.16.1.160:34298 -> 172.16.1.160:5672 - nova-conductor:156924:5a96df21-0cf1-4172-99b3-14077dc0d3f2, vhost: '/', user: 'rabbitmq'):

[root@controller160 ~]# openstack network agent list
+--------------------------------------+--------------------+------------+-------------------+-------+-------+---------------------------+
| ID                                   | Agent Type         | Host       | Availability Zone | Alive | State | Binary                    |
+--------------------------------------+--------------------+------------+-------------------+-------+-------+---------------------------+
| 12f5026f-2a62-49a2-8d87-ee0ca71688ea | Metadata agent     | compute163 | None              | XXX   | UP    | neutron-metadata-agent    |
| 7014261e-6719-447e-882a-19f45033a6c9 | Metadata agent     | compute164 | None              | XXX   | UP    | neutron-metadata-agent    |
| 767a9db2-7b3a-4edc-833a-39f4c3812b70 | DHCP agent         | compute163 | nova              | XXX   | UP    | neutron-dhcp-agent        |
| 8df5c9ee-8086-4351-8c66-866b2d2577fa | L3 agent           | compute164 | nova              | XXX   | UP    | neutron-l3-agent          |
| aa234650-cf75-4fe4-af48-0ea14495b1a6 | DHCP agent         | compute164 | nova              | XXX   | UP    | neutron-dhcp-agent        |
| b7cf0b1d-4ff0-4314-9427-8cfda3419e15 | Linux bridge agent | compute164 | None              | XXX   | UP    | neutron-linuxbridge-agent |
| c510acb9-7123-4ae3-850c-5e5184b6542a | Linux bridge agent | compute163 | None              | XXX   | UP    | neutron-linuxbridge-agent |
| c6660e76-c613-4980-90ba-23c5585b79d5 | Open vSwitch agent | compute163 | None              | XXX   | UP    | neutron-openvswitch-agent |
| dadea42c-e242-494a-a876-098cd71bcfc4 | L3 agent           | compute163 | nova              | XXX   | UP    | neutron-l3-agent          |
| e611f91f-f162-47be-aebc-d6eaf824e0d0 | Open vSwitch agent | compute164 | None              | XXX   | UP    | neutron-openvswitch-agent |
+--------------------------------------+--------------------+------------+-------------------+-------+-------+---------------------------+
[root@compute163 ~]# less /var/log/nova/nova-compute.log
2020-07-16 14:35:34.073 381825 ERROR oslo_service.service oslo_messaging.exceptions.MessageDeliveryFailure: Unable to connect to AMQP server on controller160:5672 after inf tries: Queue.declare: (4
04) NOT_FOUND - failed to perform operation on queue 'compute.compute163' in vhost '/' due to timeout

2 解决方案

#具体原因没有找出来,因此直接使用暴力的方式直接重建
#重建rabbitmq集群
#rabbitmq所在的节点都必须执行,这里只以一台为例

#停止每台节点的集群
[root@controller160 bin]# rabbitmqctl stop_app
Stopping rabbit application on node rabbit@controller160 ...
#停止rabbitmq服务
[root@controller160 bin]# systemctl stop rabbitmq-server.service
#删除或者移动mnesia目录,我这里用的是移动
[root@controller160 mnesia]# ll
total 8
drwxr-x--- 4 rabbitmq rabbitmq 4096 Jul 16 16:24 rabbit@controller160
-rw-r----- 1 rabbitmq rabbitmq   64 Jun 18 00:20 rabbit@controller160-feature_flags
drwxr-x--- 8 rabbitmq rabbitmq  178 Jul 16 15:51 rabbit@controller160-plugins-expand
[root@controller160 mnesia]# pwd
/var/lib/rabbitmq/mnesia
[root@controller160 ~]# mv /var/lib/rabbitmq/mnesia/ /tmp/mnesia

#任选1个控制节点首先启动rabbitmq服务,这里选择controller160节点
[root@controller160 ~]#systemctl start rabbitmq-server.service
[root@controller160 ~]#rabbitmqctl cluster_status

#分发.erlang.cookie
[root@controller160 ~]#scp /var/lib/rabbitmq/.erlang.cookie [email protected]:/var/lib/rabbitmq/
[root@controller160 ~]#scp /var/lib/rabbitmq/.erlang.cookie [email protected]:/var/lib/rabbitmq/

#修改controller161/162节点.erlang.cookie文件的用户/组,以controller161节点为例
[root@controller161 ~]#chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie

#注意修改全部控制节点.erlang.cookie文件的权限,默认即400权限,可不修改
[root@controller161 ~]#ll /var/lib/rabbitmq/.erlang.cookie

#启动controller161/162节点的rabbitmq服务 
[root@controller161 ~]#systemctl restart rabbitmq-server

[root@controller162 ~]#systemctl restart rabbitmq-server

#构建集群,controller161/162节点以ram节点的形式加入集群
[root@controller161 ~]#rabbitmqctl stop_app
[root@controller161 ~]#rabbitmqctl join_cluster --ram rabbit@controller160
[root@controller161 ~]#rabbitmqctl start_app

#重新授权,确认都为管理员权限
rabbitmqctl set_user_tags rabbitmq administrator
rabbitmqctl set_permissions -p "/" rabbitmq ".*" ".*" ".*"
[root@controller160 ~]# rabbitmqctl list_users
Listing users ...
user	tags
rabbitmq	[administrator]
guest	[administrator]

#最后依次重启每个节点的rabbitmq
systemctl restart rabbitmq-server

3 验证服务是否恢复:

#查看日志,已无报错

[root@controller160 ~]# openstack network agent list
+--------------------------------------+--------------------+------------+-------------------+-------+-------+---------------------------+
| ID                                   | Agent Type         | Host       | Availability Zone | Alive | State | Binary                    |
+--------------------------------------+--------------------+------------+-------------------+-------+-------+---------------------------+
| 12f5026f-2a62-49a2-8d87-ee0ca71688ea | Metadata agent     | compute163 | None              | :-)   | UP    | neutron-metadata-agent    |
| 7014261e-6719-447e-882a-19f45033a6c9 | Metadata agent     | compute164 | None              | :-)   | UP    | neutron-metadata-agent    |
| 767a9db2-7b3a-4edc-833a-39f4c3812b70 | DHCP agent         | compute163 | nova              | :-)   | UP    | neutron-dhcp-agent        |
| 8df5c9ee-8086-4351-8c66-866b2d2577fa | L3 agent           | compute164 | nova              | :-)   | UP    | neutron-l3-agent          |
| aa234650-cf75-4fe4-af48-0ea14495b1a6 | DHCP agent         | compute164 | nova              | :-)   | UP    | neutron-dhcp-agent        |
| b7cf0b1d-4ff0-4314-9427-8cfda3419e15 | Linux bridge agent | compute164 | None              | XXX   | UP    | neutron-linuxbridge-agent |
| c510acb9-7123-4ae3-850c-5e5184b6542a | Linux bridge agent | compute163 | None              | XXX   | UP    | neutron-linuxbridge-agent |
| c6660e76-c613-4980-90ba-23c5585b79d5 | Open vSwitch agent | compute163 | None              | :-)   | UP    | neutron-openvswitch-agent |
| dadea42c-e242-494a-a876-098cd71bcfc4 | L3 agent           | compute163 | nova              | :-)   | UP    | neutron-l3-agent          |
| e611f91f-f162-47be-aebc-d6eaf824e0d0 | Open vSwitch agent | compute164 | None              | :-)   | UP    | neutron-openvswitch-agent |
+--------------------------------------+--------------------+------------+-------------------+-------+-------+---------------------------+
[root@controller160 ~]# openstack compute service list
+----+----------------+---------------+----------+---------+-------+----------------------------+
| ID | Binary         | Host          | Zone     | Status  | State | Updated At                 |
+----+----------------+---------------+----------+---------+-------+----------------------------+
| 13 | nova-conductor | controller160 | internal | enabled | up    | 2020-07-16T15:59:13.000000 |
| 19 | nova-scheduler | controller160 | internal | enabled | up    | 2020-07-16T15:59:08.000000 |
| 20 | nova-conductor | controller162 | internal | enabled | up    | 2020-07-16T15:59:01.000000 |
| 23 | nova-scheduler | controller162 | internal | enabled | up    | 2020-07-16T15:58:43.000000 |
| 25 | nova-conductor | controller161 | internal | enabled | up    | 2020-07-16T15:59:18.000000 |
| 26 | nova-scheduler | controller161 | internal | enabled | up    | 2020-07-16T15:59:17.000000 |
| 47 | nova-compute   | compute163    | nova     | enabled | up    | 2020-07-16T15:59:27.000000 |
| 48 | nova-compute   | compute164    | nova     | enabled | up    | 2020-07-16T15:59:27.000000 |
+----+----------------+---------------+----------+---------+-------+----------------------------+

你可能感兴趣的:(问题解决集锦,OpenStack)