NSQ源码笔记


1.nsqd即可以作为producer的server,也可以作为consumer的server。

2.nsqd会将topic,channel的相关信息写到本地磁盘,且启动时会从文件假装到内存中来。

3.消息在内存中,存储用数组实现的小根堆,堆顶的过期时间最小。

4.每一个topic会启一个goroutine去消费topic的memoryMsgChan,每当这个topic下面收到一条消息时,则把这条消息(复制N份)发给topic下面的所有channel

5.当一个channel有多个consumer,是如何实现随机将消息随机分发给一个consumer,当有一个消息到来时,只会有一个consumer能拿到chan里面的消息,这个可以认为是随机的。

 

1.消息不可持久化(默认),默认情况下消息都在内存中。
2.消息最少会被投递一次
3.接收到的消息是无序的
4.消费者最终找出所有话题的生产者

"go-svc"
	->兼容windows&&linux

->opts := nsqd.NewOptions()
->nsqd, err := nsqd.New(opts)
->p.nsqd.LoadMetadata()
	->"从本地文件读取topic,channel信息"
	->"nsqd.dat"
		->topic1:[channel1,channel2,...]
		->topic2:[channel3,channel4,...]
	->IsValidTopicName()
	->GetTopic(topicName)
		->lookupdHTTPAddrs := n.lookupdHTTPAddrs()
			->channelNames, err := n.ci.GetLookupdTopicChannels(t.name, lookupdHTTPAddrs)
				->endpoint := fmt.Sprintf("http://%s/channels?topic=%s", addr, url.QueryEscape(topic))
				->err := c.client.GETV1(endpoint, &resp)
				->channels = append(channels, resp.Channels...)
		->channelNames = ci.GetLookupdTopicChannels(t.name, lookupdHTTPAddrs)
->p.nsqd.PersistMetadata()
	->持久化topic,channel信息到本地文件
-> go func p.nsqd.Main()
	->n.waitGroup.Wrap(func() {
		exitFunc(protocol.TCPServer(n.tcpListener, tcpServer, n.logf))
	})
		->tcpServer := &tcpServer{ctx: ctx}
		->for {} ->clientConn, err := listener.Accept()
		->go handler.Handle(clientConn) "每来一个链接,启一个goroutine"
		->func (p *tcpServer) Handle()
			->前4个字节表示协议protocolV2
			-func (p *protocolV2) IOLoop()
				->clientID := atomic.AddInt64(&p.ctx.nsqd.clientIDSequence, 1) "每来一个链接,自增1"
				->client := newClientV2(clientID, conn, p.ctx)
				->go p.messagePump(client, messagePumpStartedChan)
					->subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
						->c.addToInFlightPQ(msg)
							->"用数组实现的小根堆"
							->"堆顶的过期时间最小"
							->"比较域:消息的过期时间"

					->err = p.SendMessage(client, msg)
				->response, err = p.Exec(client, params)
					->params[0] = 判断命令操作类型,"PUB" / "SUB" / "TOUCH" / ... / ... 
					->以"PUB"为例 -> "把消息存放在topic的memoryMsgChan"
						->params[1] = topicName
						->topic := p.ctx.nsqd.GetTopic(topicName)
							->t = NewTopic(topicName, &context{n}, deleteCallback)
								->"每一个topic会启一个goroutine去消费topic的memoryMsgChan"
								->"将topic里面的信息分发给所有channel"
								->t.waitGroup.Wrap(t.messagePump)
								->func (t *Topic) messagePump()
									->"获取这个topic下面所有的channel"
									->"每当这个topic下面收到一条消息时,则把这条消息发给topic下面的所有channel"
									->"一条message先是传递给topic的chan,然后这条消息再传递给channel的chan"
									->for { case msg = <-memoryMsgChan:}
										->for 每一个 channel
											->chanMsg = NewMessage(msg.ID, msg.Body)
											->err := channel.PutMessage(chanMsg)
											->c.memoryMsgChan <- m:

											->

											->"那么这个channel里面的memoryMsgChan送到哪去了呢?"
											->当日是发送给那些有sub行为的consumer
											->"SUB"
											->"每一个client只能sub一次" -> "每一个client只对应一个channel"
											->func (p *protocolV2) SUB(client *clientV2, params [][]byte)
												->topic := p.ctx.nsqd.GetTopic(topicName)
												->channel = topic.GetChannel(channelName)
												->client.Channel = channel
												->client.SubEventChan <- channel
											-> memoryMsgChan = subChannel.memoryMsgChan
											->msg := <-memoryMsgChan
											->"当一个channel有多个consumer,是如何实现随机将消息随机分发给一个consumer"
											->"每一个consumer都会监听在这个memoryMsgChan"
											->"当有一个消息到来时,只会有一个consumer能拿到chan里面的消息,这个可以认为是随机的。"

												->subChannel.StartInFlightTimeout(msg, client.ID, msgTimeout)
													->c.addToInFlightPQ(msg)
														->c.inFlightPQ.Push(msg)
												->err = p.SendMessage(client, msg)

											->func (p *protocolV2) FIN(client *clientV2, params [][]byte)
												->id, err := getMessageID(params[1])
												->err = client.Channel.FinishMessage(client.ID, *id)
												->c.removeFromInFlightPQ(msg)



									->"精髓:区别topic的memoryMsgChan和channel的memoryMsgChan"
									
						->msg := NewMessage(topic.GenerateID(), messageBody)
						->err = topic.PutMessage(msg)
							->err := t.put(m)
								->t.memoryMsgChan <- m:
								->如果 (内存channel)满了,则写磁盘
						->client.PublishedMessage(topicName, 1)
				->err = p.Send(client, frameTypeResponse, response)

	->n.waitGroup.Wrap(n.queueScanLoop)
		->channels := n.channels()
			->returns a flat slice of all channels in all topics
		->n.resizePool(len(channels), workCh, responseCh, closeCh)
			->对于worker池中的每一个worker,启一个goroutine,一个4个
				->queueScanWorker = 4
			->n.waitGroup.Wrap(func() {
				n.queueScanWorker(workCh, responseCh, closeCh)
			})
				->c := <-workCh
				->c.processInFlightQueue(now)
					->msg, _ := c.inFlightPQ.PeekAndShift(t)
						->x := (*pq)[0]
						->pq.Pop()
					->c.popInFlightMessage(msg.clientID, msg.ID)
					->c.put(msg)
						->c.memoryMsgChan <- m:
				->c.processDeferredQueue(now)
	->n.waitGroup.Wrap(n.lookupLoop)
		->对于每一个NSQLookupd
		->lookupPeer := newLookupPeer(host, n.getOpts().MaxBodySize, n.logf,
					connectCallback(n, hostname))
					->&Command{[]byte("IDENTIFY"), nil, body}, nil
						->err = json.Unmarshal(resp, &lp.Info)
							->"peerInfo contains metadata for a lookupPeer instance "
							->"tcp_port"
							->"http_port"
							->"broadcast_address"
							->"broadcast_address:http_port" -> 拿到这个地址后,nsqd就可以去查topic,channel
							->"是去广播地址而不是去TCPAddresses"
					->for _, topic := range n.topicMap
						->commands = append(commands, nsq.Register(channel.topicName, channel.name))
						->&Command{[]byte("REGISTER"), params, nil}
		->lookupPeers = append(lookupPeers, lookupPeer)
		->n.lookupPeers.Store(lookupPeers)
			->"这个地方为什么要Store"
			->"因为nsqd要获得topic,channel的addr,会直接找lookupd查询。"
		->case <-ticker:
			->"对每一个lookupd,发送一次ping"
			->&Command{[]byte("PING"), nil, nil}
			->"在nsqLookupd那边如果收到PING,会更新活跃时间。"
		->case val := <-n.notifyChan:
			->"如果退出"
				->&Command{[]byte("UNREGISTER"), params, nil}
			->"如果注册"
				-> &Command{[]byte("REGISTER"), params, nil}



->func (c *Channel) put(m *Message) error
	->case c.memoryMsgChan <- m:
	->default
	->b := bufferPoolGet()
		->sync.Pool
		->"内存池"
		->"用来保存和复用临时对象,以减少内存分配,降低CG压力。"
	->err := writeMessageToBackend(b, m, c.backend)











"站在客户端角度"
"to_nsq"
->producer, err := nsq.NewProducer(addr, cfg)
->producer.Publish(*topic, line)
	->w.sendCommand(Publish(topic, body))
			->params = [][]byte{[]byte(topic)}
			->&Command{[]byte("PUB"), params, body}
		->doneChan := make(chan *ProducerTransaction)
		->err := w.sendCommandAsync(cmd, doneChan, nil)
			->"理解何为同步"
			->"阻塞在doneChan上面,直到有数据"
			->if atomic.LoadInt32(&w.state) != StateConnected 
				->"如果当前还未连接,那么先建立TCP连接"
				->"读监听,写监听"
				->"路由监听"
			->err := w.connect()
				->w.conn = NewConn(w.addr, &w.config, &producerConnDelegate{w})
				->w.conn.Connect()
					->conn, err := dialer.Dial("tcp", c.addr)
					->c.conn = conn.(*net.TCPConn)
					->c.r = conn
					->c.w = conn
					->c.identify()
						->ci["client_id"] = c.config.ClientID
						->ci["msg_timeout"] = int64(c.config.MsgTimeout / time.Millisecond)
						->cmd, err := Identify(ci) 
							->&Command{[]byte("IDENTIFY"), nil, body}
						->err = c.WriteCommand(cmd)
						->c.maxRdyCount = resp.MaxRdyCount
					->go c.readLoop()
						->delegate := &connMessageDelegate{c}
						->for{}
						->frameType, data, err := ReadUnpackedResponse(c)
							->"前4个byte是frame ID,后面N个byte是data"
						->FrameTypeResponse
							->c.delegate.OnResponse(c, data)
								->w.responseChan <- data
						->FrameTypeError
						->FrameTypeMessage
							->msg, err := DecodeMessage(data)
								->msg.Timestamp = int64(binary.BigEndian.Uint64(b[:8]))
								->msg.Attempts = binary.BigEndian.Uint16(b[8:10])
								->copy(msg.ID[:], b[10:10+MsgIDLength])
								->msg.Body = b[10+MsgIDLength:]
							->msg.Delegate = delegate
							->msg.NSQDAddress = c.String()
					->go c.writeLoop()
						->for{}
						->select 
						->case cmd := <-c.cmdChan:
							->c.WriteCommand(cmd)
						->case resp := <-c.msgResponseChan:
				->atomic.StoreInt32(&w.state, StateConnected)
				->go w.router()
					->for{}
					->select
					->case t := <-w.transactionChan:
						->w.transactions = append(w.transactions, t)
						->err := w.conn.WriteCommand(t.cmd)
					->case data := <-w.responseChan:
						->w.popTransaction(FrameTypeResponse, data)
							->t.doneChan <- t
							->"t.doneChan <- t"
							->"这个时候上面阻塞在doneChan上的客户端才结束"
			->t := &ProducerTransaction{
				cmd:      cmd,
				doneChan: doneChan,
				Args:     args,
				}
			->w.transactionChan <- t:
				->在 router()函数中
					->w.transactions = append(w.transactions, t)
						->"当publish之后收到response时会用"
						->"理解何为同步"
						->"也就是说,同步发送,必须等到返回,一次事物的完整,是发完数据并收到反馈。"
						->func (w *Producer) onConnResponse(c *Conn, data []byte) { w.responseChan <- data }
						->case data := <-w.responseChan:
							->w.popTransaction(FrameTypeResponse, data)
								->t := w.transactions[0]
								->"这个地方同步我理解有问题,后提交的cmd必须要等先提交的cmd"
								->"为什么叫事物,前面的cmd执行结果出来之后才能执行后面的cmd"
								->w.transactions = w.transactions[1:]
								->t.finish()
									->t.doneChan <- t
						->case data := <-w.errorChan:
					->err := w.conn.WriteCommand(t.cmd)
						->_, err := cmd.WriteTo(c)
		->t := <-doneChan


"nsq_to_nsq"
->consumer, err := nsq.NewConsumer(topic, *channel, cCfg)
	->go r.rdyLoop()
		->
		->"把准备接收消息数量分发给所有连接"
		->"redistributing max-in-flight to connections"
		->以下两种情况需要重新分配
			->len(conns) > int(maxInFlight)
			->r.inBackoff() && len(conns) > 1
		->"让那些不活跃的连接滚蛋"
			->"上一次消息到来距现在已经太久,让这个连接关闭,rdy置0"
			->"上一次非0的rdy更新太久,让这个连接关闭,rdy置0"
		->"随机挑conn,置rdy为1"

->consumer.AddConcurrentHandlers(topicHandler, len(destNsqdTCPAddrs))
	->"第一个参数:业务方自己实现处理message"
	->"第二个参数:并发度"
	for i := 0; i < concurrency; i++ {
		go r.handlerLoop(handler)
	}
	->for{}
	->message, ok := <-r.incomingMessages
	->err := handler.HandleMessage(message) -> if err != nil
		->"业务方如果处理消息失败,可以重新入队"
		->"sends a REQ command to the nsqd"
		->message.Requeue(-1)
			->m.doRequeue(delay, true)
				->m.Delegate.OnRequeue(m, delay, backoff)
				->c.msgResponseChan <- &msgResponse{msg: m, cmd: Requeue(m.ID, delay), success: false, backoff: backoff}
				->&Command{[]byte("REQ"), params, nil}
	-> if err == nil
		->"业务方如果消息处理成功,则回馈"
		->message.Finish()
			->c.msgResponseChan <- &msgResponse{msg: m, cmd: Finish(m.ID), success: true}
			->&Command{[]byte("FIN"), params, nil}
			->msgsInFlight := atomic.AddInt64(&c.messagesInFlight, -1)

->err := consumer.ConnectToNSQDs(nsqdTCPAddrs)
	->"指定详细addr"
	->func (r *Consumer) ConnectToNSQD(addr string)
		->conn := NewConn(addr, &r.config, &consumerConnDelegate{r})
		->resp, err := conn.Connect()
			->conn, err := dialer.Dial("tcp", c.addr)
			->c.conn = conn.(*net.TCPConn)
			->c.r = conn
			->c.w = conn
			->go c.readLoop()
				->"前4个byte为frameID,后N个byte为Data"
				->如果消息是_heartbeat_,返回nop
					->&Command{[]byte("NOP"), nil, nil}
				->如果是CLOSE_WAIT,则是对StartClose的应答
				->c.delegate.OnMessage(c, msg)
					->r.incomingMessages <- msg "是不是回到最初的起点"
					->atomic.AddInt64(&c.messagesInFlight, 1) "表明待处理的消息"
			->go c.writeLoop()
				->case resp := <-c.msgResponseChan: "写完之后,有消息反馈"
					-> "FIN" ->"如果反馈成功" -> resumeFlag
					->r.startStopContinueBackoff(c, resumeFlag)
						->backoffCounter--
					-> "REQ" -> "如果反馈失败" 
						->"返回backoff" -> backoffFlag
							->r.startStopContinueBackoff(c, backoffFlag)
								->backoffCounter++
								->nextBackoff := math.Pow(2, float64(attempt))
						->"否则返回continue" 
							->啥都不做
					->backoffCounter == 0 
						-> "退出backoff"
					->backoffCounter > 0 -> "在这段期间,停止接收信息"
						->"send RDY 0 immediately (to *all* connections)"
							->r.updateRDY(c, 0)
							->"停止接收消息"
						->backoffDuration := r.config.BackoffStrategy.Calculate(int(backoffCounter))
						->"在backoffDuration时间后执行"
						->time.AfterFunc(d, r.resume)
						->r.updateRDY(choice, 1)

		->cmd := Subscribe(r.topic, r.channel)
			->&Command{[]byte("SUB"), params, nil}
		->for _, c := range r.conns() {}
			->r.maybeUpdateRDY(c)
				->count := r.perConnMaxInFlight()
				->r.updateRDY(conn, count)
					->c.maxRdyCount = resp.maxRdyCount 
						->"client和nsqd协商好的最大能接收的N条消息"
						->"最大能接受"
					->c.rdyCount
						->"当前能接受"
					->maxPossibleRdy := int64(r.getMaxInFlight()) - atomic.LoadInt64(&r.totalRdyCount) + rdyCount
						->"理解这个公式?"
						->"因为 atomic.LoadInt64(&r.totalRdyCount) + (maxPossibleRdy - rdyCount) <= getMaxInFlight "
					->r.sendRDY(c, count)
						->atomic.AddInt64(&r.totalRdyCount, count-c.RDY())
							->""
						->c.SetRDY(count)
						->err := c.WriteCommand(Ready(int(count)))
						->&Command{[]byte("RDY"), params, nil}

->err := consumer.ConnectToNSQLookupds(lookupdHTTPAddrs)
	->"自动发现addr"
	->go r.lookupdLoop()
	->r.queryLookupd()
		->"make an HTTP req to one of the configured nsqlookupd instances to discover"
		->"which nsqd's provide the topic we are consuming"
		->"http://...//topic"
		->err := apiRequestNegotiateV1("GET", endpoint, nil, &data)
		->broadcastAddress := producer.BroadcastAddress
		->port := producer.TCPPort
		->joined := "broadcastAddress:port"



"nsqlookupd"
->for{}->clientConn, err := listener.Accept()
->go handler.Handle(clientConn)
->func (p *LookupProtocolV1) IOLoop()
	->response, err = p.Exec(client, reader, params)
		->"PING"
			->"更新每个nsqd节点的上一次活跃时间"
			->"节点的活跃时间,用来过滤那些不活跃的节点"
			->InactiveProducerTimeout: 300 * time.Second, "默认300s"
			->"也就是说,即使nsqd挂了,nsqd忘记发送UNREGISTER了,300s过后nsqlookupd也会将其删除"
			->Handle("GET", "/lookup")

		->"IDENTIFY"
			->peerInfo.RemoteAddress = client.RemoteAddr().String()
			->client.peerInfo = &peerInfo
			->p.ctx.nsqlookupd.DB.AddProducer(Registration{"client", "", ""}, &Producer{peerInfo: client.peerInfo})
			->response = tcp_port ... 
		->"REGISTER"
			->"client must IDENTIFY"
			->topic, channel = params[0],params[1]
			->if channel != ""
				->key := Registration{"channel", topic, channel}
				->p.ctx.nsqlookupd.DB.AddProducer(key, &Producer{peerInfo: client.peerInfo})
			->key := Registration{"topic", topic, ""}
				->p.ctx.nsqlookupd.DB.AddProducer(key, &Producer{peerInfo: client.peerInfo})
			->"如何做到高可用,"
		->"UNREGISTER"
			->topic, channel, err := getTopicChan("UNREGISTER", params)
			->"当有故障节点产生时,nsqlookupd会自动删除,那么是如何做到的呢?"
			->"如何做到故障容错?当nsqd挂掉时,nsqd有逻辑会发送unregister的cmd"
				->以下是nsqd的逻辑
				->func (c *Channel) exit()
				->func (t * topic) exit()
->router.Handle("GET", "/topics", http_api.Decorate(s.doTopics, log, http_api.V1))
->router.Handle("POST", "/topic/create", http_api.Decorate(s.doCreateTopic, log, http_api.V1))
	->topicName, err := reqParams.Get("topic")
	->key := Registration{"topic", topicName, ""}
	->s.ctx.nsqlookupd.DB.AddRegistration(key)
->router.Handle("POST", "/topic/delete", http_api.Decorate(s.doDeleteTopic, log, http_api.V1))
	->registrations := s.ctx.nsqlookupd.DB.FindRegistrations("channel", topicName, "*")
	->"先删除topic下面的所有channel"
	->"再删除topic"
->router.Handle("POST", "/channel/create", http_api.Decorate(s.doCreateChannel, log, http_api.V1))
	->key := Registration{"channel", topicName, channelName}
	->s.ctx.nsqlookupd.DB.AddRegistration(key)
	->key = Registration{"topic", topicName, ""}
	->s.ctx.nsqlookupd.DB.AddRegistration(key)





 

你可能感兴趣的:(Go开源框架源码走读)