在看nsqlookupd的时候一开始发现svn包里面的东西,不知道这是什么,就再网上查了一下
nsqd为了优雅的关闭退出,使用了svc包来管理程序的运行。先不用管svc,svc.run之后会执行Init和Start函数。
所以我们直接从它的start方法开始看
func (p *program) Start() error {
opts := nsqlookupd.NewOptions()
flagSet := nsqlookupdFlagSet(opts)
flagSet.Parse(os.Args[1:])
if flagSet.Lookup("version").Value.(flag.Getter).Get().(bool) {
fmt.Println(version.String("nsqlookupd"))
os.Exit(0)
}
var cfg map[string]interface{}
configFile := flagSet.Lookup("config").Value.String()
if configFile != "" {
_, err := toml.DecodeFile(configFile, &cfg)
if err != nil {
logFatal("failed to load config file %s - %s", configFile, err)
}
}
options.Resolve(opts, flagSet, cfg)
nsqlookupd, err := nsqlookupd.New(opts)
if err != nil {
logFatal("failed to instantiate nsqlookupd", err)
}
p.nsqlookupd = nsqlookupd
go func() {
err := p.nsqlookupd.Main()
if err != nil {
p.Stop()
os.Exit(1)
}
}()
return nil
}
在这个方法里面主要就是nsqlookupd启动的配置 ,我们都采用默认的,最终我们所关注的就是nsqlookupd.Main()方法,我们先来简单看一下默认的配置
return &Options{
LogPrefix: "[nsqlookupd] ",
LogLevel: lg.INFO,
TCPAddress: "0.0.0.0:4160",
HTTPAddress: "0.0.0.0:4161",
BroadcastAddress: hostname,
InactiveProducerTimeout: 300 * time.Second,
TombstoneLifetime: 45 * time.Second,
}
接下来就是我们最关注的方法了
func (l *NSQLookupd) Main() error {
ctx := &Context{l}
exitCh := make(chan error)
var once sync.Once
exitFunc := func(err error) {
once.Do(func() {
if err != nil {
l.logf(LOG_FATAL, "%s", err)
}
exitCh <- err
})
}
tcpServer := &tcpServer{ctx: ctx}
l.waitGroup.Wrap(func() {
exitFunc(protocol.TCPServer(l.tcpListener, tcpServer, l.logf))
})
httpServer := newHTTPServer(ctx)
l.waitGroup.Wrap(func() {
exitFunc(http_api.Serve(l.httpListener, httpServer, "HTTP", l.logf))
})
err := <-exitCh
return err
}
我们可以看到它的主要工作就是启动了TCP和HTTP对指定端口的监听,我们现在所关注的也就变成对客户端传来的信息的处理
先来看一下TCP的处理过程,nsqd和nsqloolupd建立的就是TCP连接
func TCPServer(listener net.Listener, handler TCPHandler, logf lg.AppLogFunc) error {
logf(lg.INFO, "TCP: listening on %s", listener.Addr())
for {
clientConn, err := listener.Accept()
if err != nil {
if nerr, ok := err.(net.Error); ok && nerr.Temporary() {
logf(lg.WARN, "temporary Accept() failure - %s", err)
runtime.Gosched()
continue
}
// theres no direct way to detect this error because it is not exposed
if !strings.Contains(err.Error(), "use of closed network connection") {
return fmt.Errorf("listener.Accept() error - %s", err)
}
break
}
go handler.Handle(clientConn)
}
logf(lg.INFO, "TCP: closing %s", listener.Addr())
return nil
}
nsqlookupd监听指定的端口,每当与一个新用户建立连接就启动一个goroutine来处理
func (p *tcpServer) Handle(clientConn net.Conn) {
p.ctx.nsqlookupd.logf(LOG_INFO, "TCP: new client(%s)", clientConn.RemoteAddr())
// The client should initialize itself by sending a 4 byte sequence indicating
// the version of the protocol that it intends to communicate, this will allow us
// to gracefully upgrade the protocol away from text/line oriented to whatever...
buf := make([]byte, 4)
_, err := io.ReadFull(clientConn, buf)
if err != nil {
p.ctx.nsqlookupd.logf(LOG_ERROR, "failed to read protocol version - %s", err)
clientConn.Close()
return
}
protocolMagic := string(buf)
p.ctx.nsqlookupd.logf(LOG_INFO, "CLIENT(%s): desired protocol magic '%s'",
clientConn.RemoteAddr(), protocolMagic)
var prot protocol.Protocol
switch protocolMagic {
case " V1":
prot = &LookupProtocolV1{ctx: p.ctx}
default:
protocol.SendResponse(clientConn, []byte("E_BAD_PROTOCOL"))
clientConn.Close()
p.ctx.nsqlookupd.logf(LOG_ERROR, "client(%s) bad protocol magic '%s'",
clientConn.RemoteAddr(), protocolMagic)
return
}
err = prot.IOLoop(clientConn)
if err != nil {
p.ctx.nsqlookupd.logf(LOG_ERROR, "client(%s) - %s", clientConn.RemoteAddr(), err)
return
}
}
我们发现,nsqd和nsqlookupd在建立起连接的第一次通信之后会商定通信的协议,只能是V1,之后获取协议对应的实例,执行IOLoop方法
func (p *LookupProtocolV1) IOLoop(conn net.Conn) error {
var err error
var line string
client := NewClientV1(conn)
reader := bufio.NewReader(client)
for {
line, err = reader.ReadString('\n')
if err != nil {
break
}
line = strings.TrimSpace(line)
params := strings.Split(line, " ")
var response []byte
response, err = p.Exec(client, reader, params)
if err != nil {
ctx := ""
if parentErr := err.(protocol.ChildErr).Parent(); parentErr != nil {
ctx = " - " + parentErr.Error()
}
p.ctx.nsqlookupd.logf(LOG_ERROR, "[%s] - %s%s", client, err, ctx)
_, sendErr := protocol.SendResponse(client, []byte(err.Error()))
if sendErr != nil {
p.ctx.nsqlookupd.logf(LOG_ERROR, "[%s] - %s%s", client, sendErr, ctx)
break
}
// errors of type FatalClientErr should forceably close the connection
if _, ok := err.(*protocol.FatalClientErr); ok {
break
}
continue
}
if response != nil {
_, err = protocol.SendResponse(client, response)
if err != nil {
break
}
}
}
conn.Close()
p.ctx.nsqlookupd.logf(LOG_INFO, "CLIENT(%s): closing", client)
if client.peerInfo != nil {
registrations := p.ctx.nsqlookupd.DB.LookupRegistrations(client.peerInfo.id)
for _, r := range registrations {
if removed, _ := p.ctx.nsqlookupd.DB.RemoveProducer(r, client.peerInfo.id); removed {
p.ctx.nsqlookupd.logf(LOG_INFO, "DB: client(%s) UNREGISTER category:%s key:%s subkey:%s",
client, r.Category, r.Key, r.SubKey)
}
}
}
return err
}
首先对客户端的连接包装了一写,并且获取连接的带缓冲区的reader,接下来就是不断从reader里面读取客户端发来的信息,解析完之后传入exec方法进行响应,最后将处理结果response返回给客户端。
在我们介绍nsqd的时候讲到过,nsqd和nsqlookupd连接之后首先会发送一个IDENTIFY命令交换信息,然后成功后发送REGISTER命令注册自己的所有topic,当topic或者channel发生变化的时候发送REGISTER或者UNREGISTER命令,并且定时发送PING命令保持心跳,接下来我们就看一下这几种命令处理
IDENTIFY
func (p *LookupProtocolV1) IDENTIFY(client *ClientV1, reader *bufio.Reader, params []string) ([]byte, error) {
var err error
if client.peerInfo != nil {
return nil, protocol.NewFatalClientErr(err, "E_INVALID", "cannot IDENTIFY again")
}
var bodyLen int32
err = binary.Read(reader, binary.BigEndian, &bodyLen)
if err != nil {
return nil, protocol.NewFatalClientErr(err, "E_BAD_BODY", "IDENTIFY failed to read body size")
}
body := make([]byte, bodyLen)
_, err = io.ReadFull(reader, body)
if err != nil {
return nil, protocol.NewFatalClientErr(err, "E_BAD_BODY", "IDENTIFY failed to read body")
}
// body is a json structure with producer information
peerInfo := PeerInfo{id: client.RemoteAddr().String()}
err = json.Unmarshal(body, &peerInfo)
if err != nil {
return nil, protocol.NewFatalClientErr(err, "E_BAD_BODY", "IDENTIFY failed to decode JSON body")
}
peerInfo.RemoteAddress = client.RemoteAddr().String()
// require all fields
if peerInfo.BroadcastAddress == "" || peerInfo.TCPPort == 0 || peerInfo.HTTPPort == 0 || peerInfo.Version == "" {
return nil, protocol.NewFatalClientErr(nil, "E_BAD_BODY", "IDENTIFY missing fields")
}
atomic.StoreInt64(&peerInfo.lastUpdate, time.Now().UnixNano())
p.ctx.nsqlookupd.logf(LOG_INFO, "CLIENT(%s): IDENTIFY Address:%s TCP:%d HTTP:%d Version:%s",
client, peerInfo.BroadcastAddress, peerInfo.TCPPort, peerInfo.HTTPPort, peerInfo.Version)
client.peerInfo = &peerInfo
if p.ctx.nsqlookupd.DB.AddProducer(Registration{"client", "", ""}, &Producer{peerInfo: client.peerInfo}) {
p.ctx.nsqlookupd.logf(LOG_INFO, "DB: client(%s) REGISTER category:%s key:%s subkey:%s", client, "client", "", "")
}
// build a response
data := make(map[string]interface{})
data["tcp_port"] = p.ctx.nsqlookupd.RealTCPAddr().Port
data["http_port"] = p.ctx.nsqlookupd.RealHTTPAddr().Port
data["version"] = version.Binary
hostname, err := os.Hostname()
if err != nil {
log.Fatalf("ERROR: unable to get hostname %s", err)
}
data["broadcast_address"] = p.ctx.nsqlookupd.opts.BroadcastAddress
data["hostname"] = hostname
response, err := json.Marshal(data)
if err != nil {
p.ctx.nsqlookupd.logf(LOG_ERROR, "marshaling %v", data)
return []byte("OK"), nil
}
return response, nil
}
我们可以看到大体上的逻辑就是先读取客户端传过来的信息组装成peerInfo保存到client的peerInfo字段中接着将自己的信息封装好放到response中返回给客户端。
PING
func (p *LookupProtocolV1) PING(client *ClientV1, params []string) ([]byte, error) {
if client.peerInfo != nil {
// we could get a PING before other commands on the same client connection
cur := time.Unix(0, atomic.LoadInt64(&client.peerInfo.lastUpdate))
now := time.Now()
p.ctx.nsqlookupd.logf(LOG_INFO, "CLIENT(%s): pinged (last ping %s)", client.peerInfo.id,
now.Sub(cur))
atomic.StoreInt64(&client.peerInfo.lastUpdate, now.UnixNano())
}
return []byte("OK"), nil
}
心跳更新就是简单的就是更新当前client的最后更新时间
func (p *LookupProtocolV1) REGISTER(client *ClientV1, reader *bufio.Reader, params []string) ([]byte, error) {
if client.peerInfo == nil {
return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "client must IDENTIFY")
}
topic, channel, err := getTopicChan("REGISTER", params)
if err != nil {
return nil, err
}
if channel != "" {
key := Registration{"channel", topic, channel}
if p.ctx.nsqlookupd.DB.AddProducer(key, &Producer{peerInfo: client.peerInfo}) {
p.ctx.nsqlookupd.logf(LOG_INFO, "DB: client(%s) REGISTER category:%s key:%s subkey:%s",
client, "channel", topic, channel)
}
}
key := Registration{"topic", topic, ""}
if p.ctx.nsqlookupd.DB.AddProducer(key, &Producer{peerInfo: client.peerInfo}) {
p.ctx.nsqlookupd.logf(LOG_INFO, "DB: client(%s) REGISTER category:%s key:%s subkey:%s",
client, "topic", topic, "")
}
return []byte("OK"), nil
}
nsqlookupd首先从nsqd传来的params中提取topic和channel,然后根据分别创建channel和topic的key,最后将key和peerInfo保存起来:
func (r *RegistrationDB) AddProducer(k Registration, p *Producer) bool {
r.Lock()
defer r.Unlock()
_, ok := r.registrationMap[k]
if !ok {
r.registrationMap[k] = make(map[string]*Producer)
}
producers := r.registrationMap[k]
_, found := producers[p.peerInfo.id]
if found == false {
producers[p.peerInfo.id] = p
}
return !found
}
一个topic和channel可能会有多个nsqd提供所以registrationMap中根据key又存储了一个map,第二个map的key是client的id,存储的value是这个client的peerInfo
UNREGISTER
func (p *LookupProtocolV1) UNREGISTER(client *ClientV1, reader *bufio.Reader, params []string) ([]byte, error) {
if client.peerInfo == nil {
return nil, protocol.NewFatalClientErr(nil, "E_INVALID", "client must IDENTIFY")
}
topic, channel, err := getTopicChan("UNREGISTER", params)
if err != nil {
return nil, err
}
if channel != "" {
key := Registration{"channel", topic, channel}
removed, left := p.ctx.nsqlookupd.DB.RemoveProducer(key, client.peerInfo.id)
if removed {
p.ctx.nsqlookupd.logf(LOG_INFO, "DB: client(%s) UNREGISTER category:%s key:%s subkey:%s",
client, "channel", topic, channel)
}
// for ephemeral channels, remove the channel as well if it has no producers
if left == 0 && strings.HasSuffix(channel, "#ephemeral") {
p.ctx.nsqlookupd.DB.RemoveRegistration(key)
}
} else {
// no channel was specified so this is a topic unregistration
// remove all of the channel registrations...
// normally this shouldn't happen which is why we print a warning message
// if anything is actually removed
registrations := p.ctx.nsqlookupd.DB.FindRegistrations("channel", topic, "*")
for _, r := range registrations {
removed, _ := p.ctx.nsqlookupd.DB.RemoveProducer(r, client.peerInfo.id)
if removed {
p.ctx.nsqlookupd.logf(LOG_WARN, "client(%s) unexpected UNREGISTER category:%s key:%s subkey:%s",
client, "channel", topic, r.SubKey)
}
}
key := Registration{"topic", topic, ""}
removed, left := p.ctx.nsqlookupd.DB.RemoveProducer(key, client.peerInfo.id)
if removed {
p.ctx.nsqlookupd.logf(LOG_INFO, "DB: client(%s) UNREGISTER category:%s key:%s subkey:%s",
client, "topic", topic, "")
}
if left == 0 && strings.HasSuffix(topic, "#ephemeral") {
p.ctx.nsqlookupd.DB.RemoveRegistration(key)
}
}
return []byte("OK"), nil
}
取消注册其实就是注册的逆操作,当channel不为空的时候直接拼装key删除当前client对应的信息,而当channel为空的时候就要删除全部要删除的topic的当前client的信息
nsqd和nsqlookupd的交互大体上就这些了,还记得当消费者要消费消息的时候是先向nsqlookupd发送http请求获取要获取的topic和channel对应的nsqd的,那么接下来我们来看一下这部分内容
func newHTTPServer(ctx *Context) *httpServer {
log := http_api.Log(ctx.nsqlookupd.logf)
router := httprouter.New()
router.HandleMethodNotAllowed = true
router.PanicHandler = http_api.LogPanicHandler(ctx.nsqlookupd.logf)
router.NotFound = http_api.LogNotFoundHandler(ctx.nsqlookupd.logf)
router.MethodNotAllowed = http_api.LogMethodNotAllowedHandler(ctx.nsqlookupd.logf)
s := &httpServer{
ctx: ctx,
router: router,
}
router.Handle("GET", "/ping", http_api.Decorate(s.pingHandler, log, http_api.PlainText))
router.Handle("GET", "/info", http_api.Decorate(s.doInfo, log, http_api.V1))
// v1 negotiate
router.Handle("GET", "/debug", http_api.Decorate(s.doDebug, log, http_api.V1))
router.Handle("GET", "/lookup", http_api.Decorate(s.doLookup, log, http_api.V1))
router.Handle("GET", "/topics", http_api.Decorate(s.doTopics, log, http_api.V1))
router.Handle("GET", "/channels", http_api.Decorate(s.doChannels, log, http_api.V1))
router.Handle("GET", "/nodes", http_api.Decorate(s.doNodes, log, http_api.V1))
// only v1
router.Handle("POST", "/topic/create", http_api.Decorate(s.doCreateTopic, log, http_api.V1))
router.Handle("POST", "/topic/delete", http_api.Decorate(s.doDeleteTopic, log, http_api.V1))
router.Handle("POST", "/channel/create", http_api.Decorate(s.doCreateChannel, log, http_api.V1))
router.Handle("POST", "/channel/delete", http_api.Decorate(s.doDeleteChannel, log, http_api.V1))
router.Handle("POST", "/topic/tombstone", http_api.Decorate(s.doTombstoneTopicProducer, log, http_api.V1))
// debug
router.HandlerFunc("GET", "/debug/pprof", pprof.Index)
router.HandlerFunc("GET", "/debug/pprof/cmdline", pprof.Cmdline)
router.HandlerFunc("GET", "/debug/pprof/symbol", pprof.Symbol)
router.HandlerFunc("POST", "/debug/pprof/symbol", pprof.Symbol)
router.HandlerFunc("GET", "/debug/pprof/profile", pprof.Profile)
router.Handler("GET", "/debug/pprof/heap", pprof.Handler("heap"))
router.Handler("GET", "/debug/pprof/goroutine", pprof.Handler("goroutine"))
router.Handler("GET", "/debug/pprof/block", pprof.Handler("block"))
router.Handler("GET", "/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
return s
}
nsqlookupd启动httpServer的时候为它注册了上面这么多根据不同的method,url对应的handle,我们来看一下消费者请求的url“/lookup”
func (s *httpServer) doLookup(w http.ResponseWriter, req *http.Request, ps httprouter.Params) (interface{}, error) {
reqParams, err := http_api.NewReqParams(req)
if err != nil {
return nil, http_api.Err{400, "INVALID_REQUEST"}
}
topicName, err := reqParams.Get("topic")
if err != nil {
return nil, http_api.Err{400, "MISSING_ARG_TOPIC"}
}
registration := s.ctx.nsqlookupd.DB.FindRegistrations("topic", topicName, "")
if len(registration) == 0 {
return nil, http_api.Err{404, "TOPIC_NOT_FOUND"}
}
channels := s.ctx.nsqlookupd.DB.FindRegistrations("channel", topicName, "*").SubKeys()
producers := s.ctx.nsqlookupd.DB.FindProducers("topic", topicName, "")
producers = producers.FilterByActive(s.ctx.nsqlookupd.opts.InactiveProducerTimeout,
s.ctx.nsqlookupd.opts.TombstoneLifetime)
return map[string]interface{}{
"channels": channels,
"producers": producers.PeerInfo(),
}, nil
}
根据用户传过来的topic找到对应的所有当前topic对应的client返回给用户。
总之,nsqlookupd不是很复杂,主要是负责管理,保存信息。