python在运行RabbitMQ运行时遇到的坑之一:Stream connection lost: IndexError(‘pop from an empty deque‘)

在运行这段代码时出现了这样的报错:Stream connection lost: IndexError('pop from an empty deque'),但是调试这段代码的时候又没有问题

import json
import threading
import time
from collections import defaultdict

import pika
from loguru import logger

from config import get_rabbitmq_connection  # 确保配置文件正确


class SwarmAPI:
    def generate_reply(self, user_id, message, context=None):
        # 实际应调用Swarm框架API,此处保持模拟
        return f"感谢您的消息:'{message}'。我们已收到并会尽快处理。"


class MessageReassembler:
    def __init__(self, timeout=500):
        self.sessions = {}  # 使用普通字典避免默认初始化
        self.lock = threading.Lock()
        self.timeout = timeout

    def add_fragment(self, user_id, fragment, is_final=False):
        with self.lock:
            current_time = time.time()

            # 如果该用户的会话不存在,初始化
            if user_id not in self.sessions:
                self.sessions[user_id] = {'data': '', 'timestamp': 0}

            session = self.sessions[user_id]

            # 超时重置会话
            if current_time - session['timestamp'] > self.timeout:
                logger.warning(f"用户 {user_id} 的会话已超时,重置会话")
                session['data'] = ''

            # 追加当前消息片段
            session['data'] += fragment
            session['timestamp'] = current_time

            # 检查消息完整性
            if is_final or self._check_completeness(fragment):
                full_msg = session['data']
                # 返回完整消息后删除会话数据
                del self.sessions[user_id]
                return full_msg
            return None

    def _check_completeness(self, fragment):
        # 结合自然语言终止符和简单逻辑判断消息是否完整
        try:
            # 可以扩展判断消息完整性的规则
            return any(c in fragment for c in ('.', '?', '!', '。', '?', '!')) or len(fragment) > 100
        except Exception as e:
            logger.error(f"检查消息完整性时出错: {e}")
            return False



class ChatConsumer:
    def __init__(self, thread_num=4, timeout=5):
        try:
            self.reassembler = MessageReassembler(timeout)
            self.swarm = SwarmAPI()

            # RabbitMQ连接
            self.connection = pika.BlockingConnection(get_rabbitmq_connection())
            self.channel = self.connection.channel()

            # 统一队列声明
            self.channel.queue_declare(queue='client_messages', durable=True)
            self.channel.queue_declare(queue='bot_responses', durable=True)

            self.channel.basic_qos(prefetch_count=100)
            self.channel.basic_consume(queue='client_messages', on_message_callback=self._consume_messages, auto_ack=True)
            self.thread_num = thread_num
            self.user_fragments = {}
        except Exception as e:
            logger.error(f"初始化失败: {e}")
            raise

    def _consume_messages(self,channel, method, properties, body):
        # for method, properties, body in self.channel.consume('customer_messages'):
        try:
            message = json.loads(body)
            user_id = message["user_id"]
            fragment = message["content"]
            is_final = message.get("is_final", False)

            # 获取或初始化当前用户的消息片段
            if user_id not in self.user_fragments:
                self.user_fragments[user_id] = []

            # 添加当前片段
            self.user_fragments[user_id].append(fragment)

            # 如果是最后一段,重组完整消息并处理
            if is_final:
                full_msg = ''.join(self.user_fragments[user_id])
                del self.user_fragments[user_id]  # 清空该用户的片段数据
                # 异步处理回复
                threading.Thread(
                    target=self._process_and_reply,
                    args=(user_id, full_msg),
                    daemon=True
                ).start()
                # self.channel.basic_ack(delivery_tag = method.delivery_tag)
        except json.JSONDecodeError:
            logger.error(f"无效JSON数据: {body}")
            self.channel.basic_reject(method.delivery_tag, requeue=False)
        except Exception as e:
            logger.error(f"处理消息异常: {e}")
            self.channel.basic_reject(method.delivery_tag, requeue=True)

    def _process_and_reply(self, user_id, full_msg):
        try:
            response = self.swarm.generate_reply(user_id, full_msg)
            self._send_response(user_id, response)
            logger.info(f"已回复用户 {user_id}")
        except Exception as e:
            logger.error(f"生成回复失败: {e}")
            self._send_response(user_id, "服务暂时不可用,请稍后重试")

    def _send_response(self, user_id, response):
        self.channel.basic_publish(
            exchange='',
            routing_key='bot_responses',
            body=json.dumps({
                "user_id": user_id,
                "response": response,
                "timestamp": time.time()
            }),
            properties=pika.BasicProperties(
                delivery_mode=2,
                content_type="application/json"
            )
        )


if __name__ == '__main__':
    consumer = ChatConsumer(thread_num=4)
    try:
        logger.info("启动消息消费者...")
        consumer.channel.start_consuming()
    except KeyboardInterrupt:
        logger.info("优雅关闭中...")

经过分析,该错误通常由多线程环境下共享的RabbitMQ通道(channel)引发。pika.BlockingConnection的通道是非线程安全的,多线程同时操作会引发IndexError。以下是解决方案:

步骤1:重构通道管理(线程隔离通道)

# 在ChatConsumer类中修改以下方法
def _send_response(self, user_id, response):
    # 每个线程创建独立连接和通道
    with pika.BlockingConnection(get_rabbitmq_connection()) as conn:
        channel = conn.channel()
        channel.queue_declare(queue='bot_responses', durable=True)
        channel.basic_publish(
            exchange='',
            routing_key='bot_responses',
            body=json.dumps({
                "user_id": user_id,
                "response": response,
                "timestamp": time.time()
            }),
            properties=pika.BasicProperties(
                delivery_mode=2,
                content_type="application/json"
            )
        )

步骤2:关闭自动确认,手动管理ACK

# 修改初始化代码
self.channel.basic_consume(
    queue='client_messages',
    on_message_callback=self._consume_messages,
    auto_ack=False  # 关闭自动ACK
)

# 修改_consume_messages方法
def _consume_messages(self, channel, method, properties, body):
    try:
        # ...原有逻辑...
        # 处理成功后手动ACK
        self.channel.basic_ack(delivery_tag=method.delivery_tag)
    except Exception as e:
        # 异常时拒绝并重新入队
        self.channel.basic_nack(delivery_tag=method.delivery_tag, requeue=True)

步骤3:为user_fragments添加线程锁

class ChatConsumer:
    def __init__(self, thread_num=4, timeout=5):
        # ...
        self.user_fragments = {}
        self.fragment_lock = threading.Lock()  # 新增锁

    def _consume_messages(self, channel, method, properties, body):
        # ...
        with self.fragment_lock:  # 加锁操作
            if user_id not in self.user_fragments:
                self.user_fragments[user_id] = []
            self.user_fragments[user_id].append(fragment)
            if is_final:
                full_msg = ''.join(self.user_fragments[user_id])
                del self.user_fragments[user_id]

其他建议:

  1. 升级pika至最新版本:pip install -U pika

  2. 连接参数增加心跳检测:

# config.py中修改get_rabbitmq_connection()
def get_rabbitmq_connection():
    return pika.ConnectionParameters(
        host='localhost',
        heartbeat=600  # 增加心跳检测
    )

根本原因:

  • 原始代码共享的channel被多线程同时操作,导致pika内部队列异常

  • 自动确认机制导致异常场景消息丢失

  • 共享数据结构缺乏线程同步

测试验证:

  1. 使用压力测试工具发送并发消息

  2. 观察是否出现pop from empty deque错误

  3. 监控RabbitMQ连接状态是否稳定

修改后应能解决该错误,同时提升系统稳定性。若问题仍存在,建议使用pika.BlockingConnection配合连接池方案。

你可能感兴趣的:(python,rabbitmq,开发语言)