1.nsqd即可以作为producer的server,也可以作为consumer的server。
2.nsqd会将topic,channel的相关信息写到本地磁盘,且启动时会从文件假装到内存中来。
3.消息在内存中,存储用数组实现的小根堆,堆顶的过期时间最小。
4.每一个topic会启一个goroutine去消费topic的memoryMsgChan,每当这个topic下面收到一条消息时,则把这条消息(复制N份)发给topic下面的所有channel
5.当一个channel有多个consumer,是如何实现随机将消息随机分发给一个consumer,当有一个消息到来时,只会有一个consumer能拿到chan里面的消息,这个可以认为是随机的。
1.消息不可持久化(默认),默认情况下消息都在内存中。
2.消息最少会被投递一次
3.接收到的消息是无序的
4.消费者最终找出所有话题的生产者
"go-svc"
->兼容windows&&linux
->opts := nsqd.NewOptions()
->nsqd, err := nsqd.New(opts)
->p.nsqd.LoadMetadata()
->"从本地文件读取topic,channel信息"
->"nsqd.dat"
->topic1:[channel1,channel2,...]
->topic2:[channel3,channel4,...]
->IsValidTopicName()
->GetTopic(topicName)
->lookupdHTTPAddrs := n.lookupdHTTPAddrs()
->channelNames, err := n.ci.GetLookupdTopicChannels(t.name, lookupdHTTPAddrs)
->endpoint := fmt.Sprintf("http://%s/channels?topic=%s", addr, url.QueryEscape(topic))
->err := c.client.GETV1(endpoint, &resp)
->channels = append(channels, resp.Channels...)
->channelNames = ci.GetLookupdTopicChannels(t.name, lookupdHTTPAddrs)
->p.nsqd.PersistMetadata()
->持久化topic,channel信息到本地文件
-> go func p.nsqd.Main()
->n.waitGroup.Wrap(func() {
exitFunc(protocol.TCPServer(n.tcpListener, tcpServer, n.logf))
})
->tcpServer := &tcpServer{ctx: ctx}
->for {} ->clientConn, err := listener.Accept()
->go handler.Handle(clientConn) "每来一个链接,启一个goroutine"
->func (p *tcpServer) Handle()
->前4个字节表示协议protocolV2
-func (p *protocolV2) IOLoop()
->clientID := atomic.AddInt64(&p.ctx.nsqd.clientIDSequence, 1) "每来一个链接,自增1"
->client := newClientV2(clientID, conn, p.ctx)
->go p.messagePump(client, messagePumpStartedChan)
->subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
->c.addToInFlightPQ(msg)
->"用数组实现的小根堆"
->"堆顶的过期时间最小"
->"比较域:消息的过期时间"
->err = p.SendMessage(client, msg)
->response, err = p.Exec(client, params)
->params[0] = 判断命令操作类型,"PUB" / "SUB" / "TOUCH" / ... / ...
->以"PUB"为例 -> "把消息存放在topic的memoryMsgChan"
->params[1] = topicName
->topic := p.ctx.nsqd.GetTopic(topicName)
->t = NewTopic(topicName, &context{n}, deleteCallback)
->"每一个topic会启一个goroutine去消费topic的memoryMsgChan"
->"将topic里面的信息分发给所有channel"
->t.waitGroup.Wrap(t.messagePump)
->func (t *Topic) messagePump()
->"获取这个topic下面所有的channel"
->"每当这个topic下面收到一条消息时,则把这条消息发给topic下面的所有channel"
->"一条message先是传递给topic的chan,然后这条消息再传递给channel的chan"
->for { case msg = <-memoryMsgChan:}
->for 每一个 channel
->chanMsg = NewMessage(msg.ID, msg.Body)
->err := channel.PutMessage(chanMsg)
->c.memoryMsgChan <- m:
->
->"那么这个channel里面的memoryMsgChan送到哪去了呢?"
->当日是发送给那些有sub行为的consumer
->"SUB"
->"每一个client只能sub一次" -> "每一个client只对应一个channel"
->func (p *protocolV2) SUB(client *clientV2, params [][]byte)
->topic := p.ctx.nsqd.GetTopic(topicName)
->channel = topic.GetChannel(channelName)
->client.Channel = channel
->client.SubEventChan <- channel
-> memoryMsgChan = subChannel.memoryMsgChan
->msg := <-memoryMsgChan
->"当一个channel有多个consumer,是如何实现随机将消息随机分发给一个consumer"
->"每一个consumer都会监听在这个memoryMsgChan"
->"当有一个消息到来时,只会有一个consumer能拿到chan里面的消息,这个可以认为是随机的。"
->subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
->c.addToInFlightPQ(msg)
->c.inFlightPQ.Push(msg)
->err = p.SendMessage(client, msg)
->func (p *protocolV2) FIN(client *clientV2, params [][]byte)
->id, err := getMessageID(params[1])
->err = client.Channel.FinishMessage(client.ID, *id)
->c.removeFromInFlightPQ(msg)
->"精髓:区别topic的memoryMsgChan和channel的memoryMsgChan"
->msg := NewMessage(topic.GenerateID(), messageBody)
->err = topic.PutMessage(msg)
->err := t.put(m)
->t.memoryMsgChan <- m:
->如果 (内存channel)满了,则写磁盘
->client.PublishedMessage(topicName, 1)
->err = p.Send(client, frameTypeResponse, response)
->n.waitGroup.Wrap(n.queueScanLoop)
->channels := n.channels()
->returns a flat slice of all channels in all topics
->n.resizePool(len(channels), workCh, responseCh, closeCh)
->对于worker池中的每一个worker,启一个goroutine,一个4个
->queueScanWorker = 4
->n.waitGroup.Wrap(func() {
n.queueScanWorker(workCh, responseCh, closeCh)
})
->c := <-workCh
->c.processInFlightQueue(now)
->msg, _ := c.inFlightPQ.PeekAndShift(t)
->x := (*pq)[0]
->pq.Pop()
->c.popInFlightMessage(msg.clientID, msg.ID)
->c.put(msg)
->c.memoryMsgChan <- m:
->c.processDeferredQueue(now)
->n.waitGroup.Wrap(n.lookupLoop)
->对于每一个NSQLookupd
->lookupPeer := newLookupPeer(host, n.getOpts().MaxBodySize, n.logf,
connectCallback(n, hostname))
->&Command{[]byte("IDENTIFY"), nil, body}, nil
->err = json.Unmarshal(resp, &lp.Info)
->"peerInfo contains metadata for a lookupPeer instance "
->"tcp_port"
->"http_port"
->"broadcast_address"
->"broadcast_address:http_port" -> 拿到这个地址后,nsqd就可以去查topic,channel
->"是去广播地址而不是去TCPAddresses"
->for _, topic := range n.topicMap
->commands = append(commands, nsq.Register(channel.topicName, channel.name))
->&Command{[]byte("REGISTER"), params, nil}
->lookupPeers = append(lookupPeers, lookupPeer)
->n.lookupPeers.Store(lookupPeers)
->"这个地方为什么要Store"
->"因为nsqd要获得topic,channel的addr,会直接找lookupd查询。"
->case <-ticker:
->"对每一个lookupd,发送一次ping"
->&Command{[]byte("PING"), nil, nil}
->"在nsqLookupd那边如果收到PING,会更新活跃时间。"
->case val := <-n.notifyChan:
->"如果退出"
->&Command{[]byte("UNREGISTER"), params, nil}
->"如果注册"
-> &Command{[]byte("REGISTER"), params, nil}
->func (c *Channel) put(m *Message) error
->case c.memoryMsgChan <- m:
->default
->b := bufferPoolGet()
->sync.Pool
->"内存池"
->"用来保存和复用临时对象,以减少内存分配,降低CG压力。"
->err := writeMessageToBackend(b, m, c.backend)
"站在客户端角度"
"to_nsq"
->producer, err := nsq.NewProducer(addr, cfg)
->producer.Publish(*topic, line)
->w.sendCommand(Publish(topic, body))
->params = [][]byte{[]byte(topic)}
->&Command{[]byte("PUB"), params, body}
->doneChan := make(chan *ProducerTransaction)
->err := w.sendCommandAsync(cmd, doneChan, nil)
->"理解何为同步"
->"阻塞在doneChan上面,直到有数据"
->if atomic.LoadInt32(&w.state) != StateConnected
->"如果当前还未连接,那么先建立TCP连接"
->"读监听,写监听"
->"路由监听"
->err := w.connect()
->w.conn = NewConn(w.addr, &w.config, &producerConnDelegate{w})
->w.conn.Connect()
->conn, err := dialer.Dial("tcp", c.addr)
->c.conn = conn.(*net.TCPConn)
->c.r = conn
->c.w = conn
->c.identify()
->ci["client_id"] = c.config.ClientID
->ci["msg_timeout"] = int64(c.config.MsgTimeout / time.Millisecond)
->cmd, err := Identify(ci)
->&Command{[]byte("IDENTIFY"), nil, body}
->err = c.WriteCommand(cmd)
->c.maxRdyCount = resp.MaxRdyCount
->go c.readLoop()
->delegate := &connMessageDelegate{c}
->for{}
->frameType, data, err := ReadUnpackedResponse(c)
->"前4个byte是frame ID,后面N个byte是data"
->FrameTypeResponse
->c.delegate.OnResponse(c, data)
->w.responseChan <- data
->FrameTypeError
->FrameTypeMessage
->msg, err := DecodeMessage(data)
->msg.Timestamp = int64(binary.BigEndian.Uint64(b[:8]))
->msg.Attempts = binary.BigEndian.Uint16(b[8:10])
->copy(msg.ID[:], b[10:10+MsgIDLength])
->msg.Body = b[10+MsgIDLength:]
->msg.Delegate = delegate
->msg.NSQDAddress = c.String()
->go c.writeLoop()
->for{}
->select
->case cmd := <-c.cmdChan:
->c.WriteCommand(cmd)
->case resp := <-c.msgResponseChan:
->atomic.StoreInt32(&w.state, StateConnected)
->go w.router()
->for{}
->select
->case t := <-w.transactionChan:
->w.transactions = append(w.transactions, t)
->err := w.conn.WriteCommand(t.cmd)
->case data := <-w.responseChan:
->w.popTransaction(FrameTypeResponse, data)
->t.doneChan <- t
->"t.doneChan <- t"
->"这个时候上面阻塞在doneChan上的客户端才结束"
->t := &ProducerTransaction{
cmd: cmd,
doneChan: doneChan,
Args: args,
}
->w.transactionChan <- t:
->在 router()函数中
->w.transactions = append(w.transactions, t)
->"当publish之后收到response时会用"
->"理解何为同步"
->"也就是说,同步发送,必须等到返回,一次事物的完整,是发完数据并收到反馈。"
->func (w *Producer) onConnResponse(c *Conn, data []byte) { w.responseChan <- data }
->case data := <-w.responseChan:
->w.popTransaction(FrameTypeResponse, data)
->t := w.transactions[0]
->"这个地方同步我理解有问题,后提交的cmd必须要等先提交的cmd"
->"为什么叫事物,前面的cmd执行结果出来之后才能执行后面的cmd"
->w.transactions = w.transactions[1:]
->t.finish()
->t.doneChan <- t
->case data := <-w.errorChan:
->err := w.conn.WriteCommand(t.cmd)
->_, err := cmd.WriteTo(c)
->t := <-doneChan
"nsq_to_nsq"
->consumer, err := nsq.NewConsumer(topic, *channel, cCfg)
->go r.rdyLoop()
->
->"把准备接收消息数量分发给所有连接"
->"redistributing max-in-flight to connections"
->以下两种情况需要重新分配
->len(conns) > int(maxInFlight)
->r.inBackoff() && len(conns) > 1
->"让那些不活跃的连接滚蛋"
->"上一次消息到来距现在已经太久,让这个连接关闭,rdy置0"
->"上一次非0的rdy更新太久,让这个连接关闭,rdy置0"
->"随机挑conn,置rdy为1"
->consumer.AddConcurrentHandlers(topicHandler, len(destNsqdTCPAddrs))
->"第一个参数:业务方自己实现处理message"
->"第二个参数:并发度"
for i := 0; i < concurrency; i++ {
go r.handlerLoop(handler)
}
->for{}
->message, ok := <-r.incomingMessages
->err := handler.HandleMessage(message) -> if err != nil
->"业务方如果处理消息失败,可以重新入队"
->"sends a REQ command to the nsqd"
->message.Requeue(-1)
->m.doRequeue(delay, true)
->m.Delegate.OnRequeue(m, delay, backoff)
->c.msgResponseChan <- &msgResponse{msg: m, cmd: Requeue(m.ID, delay), success: false, backoff: backoff}
->&Command{[]byte("REQ"), params, nil}
-> if err == nil
->"业务方如果消息处理成功,则回馈"
->message.Finish()
->c.msgResponseChan <- &msgResponse{msg: m, cmd: Finish(m.ID), success: true}
->&Command{[]byte("FIN"), params, nil}
->msgsInFlight := atomic.AddInt64(&c.messagesInFlight, -1)
->err := consumer.ConnectToNSQDs(nsqdTCPAddrs)
->"指定详细addr"
->func (r *Consumer) ConnectToNSQD(addr string)
->conn := NewConn(addr, &r.config, &consumerConnDelegate{r})
->resp, err := conn.Connect()
->conn, err := dialer.Dial("tcp", c.addr)
->c.conn = conn.(*net.TCPConn)
->c.r = conn
->c.w = conn
->go c.readLoop()
->"前4个byte为frameID,后N个byte为Data"
->如果消息是_heartbeat_,返回nop
->&Command{[]byte("NOP"), nil, nil}
->如果是CLOSE_WAIT,则是对StartClose的应答
->c.delegate.OnMessage(c, msg)
->r.incomingMessages <- msg "是不是回到最初的起点"
->atomic.AddInt64(&c.messagesInFlight, 1) "表明待处理的消息"
->go c.writeLoop()
->case resp := <-c.msgResponseChan: "写完之后,有消息反馈"
-> "FIN" ->"如果反馈成功" -> resumeFlag
->r.startStopContinueBackoff(c, resumeFlag)
->backoffCounter--
-> "REQ" -> "如果反馈失败"
->"返回backoff" -> backoffFlag
->r.startStopContinueBackoff(c, backoffFlag)
->backoffCounter++
->nextBackoff := math.Pow(2, float64(attempt))
->"否则返回continue"
->啥都不做
->backoffCounter == 0
-> "退出backoff"
->backoffCounter > 0 -> "在这段期间,停止接收信息"
->"send RDY 0 immediately (to *all* connections)"
->r.updateRDY(c, 0)
->"停止接收消息"
->backoffDuration := r.config.BackoffStrategy.Calculate(int(backoffCounter))
->"在backoffDuration时间后执行"
->time.AfterFunc(d, r.resume)
->r.updateRDY(choice, 1)
->cmd := Subscribe(r.topic, r.channel)
->&Command{[]byte("SUB"), params, nil}
->for _, c := range r.conns() {}
->r.maybeUpdateRDY(c)
->count := r.perConnMaxInFlight()
->r.updateRDY(conn, count)
->c.maxRdyCount = resp.maxRdyCount
->"client和nsqd协商好的最大能接收的N条消息"
->"最大能接受"
->c.rdyCount
->"当前能接受"
->maxPossibleRdy := int64(r.getMaxInFlight()) - atomic.LoadInt64(&r.totalRdyCount) + rdyCount
->"理解这个公式?"
->"因为 atomic.LoadInt64(&r.totalRdyCount) + (maxPossibleRdy - rdyCount) <= getMaxInFlight "
->r.sendRDY(c, count)
->atomic.AddInt64(&r.totalRdyCount, count-c.RDY())
->""
->c.SetRDY(count)
->err := c.WriteCommand(Ready(int(count)))
->&Command{[]byte("RDY"), params, nil}
->err := consumer.ConnectToNSQLookupds(lookupdHTTPAddrs)
->"自动发现addr"
->go r.lookupdLoop()
->r.queryLookupd()
->"make an HTTP req to one of the configured nsqlookupd instances to discover"
->"which nsqd's provide the topic we are consuming"
->"http://...//topic"
->err := apiRequestNegotiateV1("GET", endpoint, nil, &data)
->broadcastAddress := producer.BroadcastAddress
->port := producer.TCPPort
->joined := "broadcastAddress:port"
"nsqlookupd"
->for{}->clientConn, err := listener.Accept()
->go handler.Handle(clientConn)
->func (p *LookupProtocolV1) IOLoop()
->response, err = p.Exec(client, reader, params)
->"PING"
->"更新每个nsqd节点的上一次活跃时间"
->"节点的活跃时间,用来过滤那些不活跃的节点"
->InactiveProducerTimeout: 300 * time.Second, "默认300s"
->"也就是说,即使nsqd挂了,nsqd忘记发送UNREGISTER了,300s过后nsqlookupd也会将其删除"
->Handle("GET", "/lookup")
->"IDENTIFY"
->peerInfo.RemoteAddress = client.RemoteAddr().String()
->client.peerInfo = &peerInfo
->p.ctx.nsqlookupd.DB.AddProducer(Registration{"client", "", ""}, &Producer{peerInfo: client.peerInfo})
->response = tcp_port ...
->"REGISTER"
->"client must IDENTIFY"
->topic, channel = params[0],params[1]
->if channel != ""
->key := Registration{"channel", topic, channel}
->p.ctx.nsqlookupd.DB.AddProducer(key, &Producer{peerInfo: client.peerInfo})
->key := Registration{"topic", topic, ""}
->p.ctx.nsqlookupd.DB.AddProducer(key, &Producer{peerInfo: client.peerInfo})
->"如何做到高可用,"
->"UNREGISTER"
->topic, channel, err := getTopicChan("UNREGISTER", params)
->"当有故障节点产生时,nsqlookupd会自动删除,那么是如何做到的呢?"
->"如何做到故障容错?当nsqd挂掉时,nsqd有逻辑会发送unregister的cmd"
->以下是nsqd的逻辑
->func (c *Channel) exit()
->func (t * topic) exit()
->router.Handle("GET", "/topics", http_api.Decorate(s.doTopics, log, http_api.V1))
->router.Handle("POST", "/topic/create", http_api.Decorate(s.doCreateTopic, log, http_api.V1))
->topicName, err := reqParams.Get("topic")
->key := Registration{"topic", topicName, ""}
->s.ctx.nsqlookupd.DB.AddRegistration(key)
->router.Handle("POST", "/topic/delete", http_api.Decorate(s.doDeleteTopic, log, http_api.V1))
->registrations := s.ctx.nsqlookupd.DB.FindRegistrations("channel", topicName, "*")
->"先删除topic下面的所有channel"
->"再删除topic"
->router.Handle("POST", "/channel/create", http_api.Decorate(s.doCreateChannel, log, http_api.V1))
->key := Registration{"channel", topicName, channelName}
->s.ctx.nsqlookupd.DB.AddRegistration(key)
->key = Registration{"topic", topicName, ""}
->s.ctx.nsqlookupd.DB.AddRegistration(key)