grpc实现一个较复杂的聊天室

为什么80%的码农都做不了架构师?>>>   hot3.png

背景

接上文 https://my.oschina.net/tuxpy/blog/1631953

之前通过grpc实现了一个双向流的方式来实现聊天室. 所有的stream都是存在一个sync.Map里。如果这时候聊天室的压力大了,如何做扩展? 说得简单点就是怎么样来做负载均衡。之前在https://segmentfault.com/a/1190000008672912 看到过一篇借助etcd来实现负载均衡的文章,就借来实现了哈.

问题

  1. grpc的服务发现如何做?
  2. 某一个client发送的聊天内容,如何让分布在不同的节点上的client都收到
  3. 当某些client连接的service节点down了,如何让client重新发现并连接健康的节点? 如果所有的rpc方法都是request <-> response模式 倒没什么问题,大不了有可能会丢次当前正在处理的请求. 但是像聊天室这类应用,使用双向流模式, 一旦一个节点down了,后续服务端的stream就会得到codes.Unavailable错误

解决

  1. 参考https://segmentfault.com/a/1190000008672912 。服务端注册key, 定期keepalive, 客户端watch
  2. 利用etcd的put + watch实现一个remote channel,所有服务节点监听某一个key的PUT行为,根据value内容调用相应的方法(如广播消息给连接到自己的所有客户端). 之前还想过,每一个service同时还是一个grpc client,每次自己有广播行为时, 远程调用其他节点广播操作。天呐,还要维护一份所有节点的清单,杀了我吧.
  3. 将身份验证操作和聊天操作分离出来,先进行身份验证,取出token, 后续聊天的rpc调用时带上. token验证借助etcd实现的session, 每次断线重连,只影响聊天的stream, 不会重复登录.

实现

service

/*
 *
 *     Author        : tuxpy
 *     Email         : [email protected]
 *     Create time   : 3/7/18 9:18 AM
 *     Filename      : service.go
 *     Description   :
 *
 *
 */

package main

import (
	"bytes"
	"context"
	"crypto/rand"
	"encoding/gob"
	"encoding/hex"
	"encoding/json"
	"flag"
	"fmt"
	grpclb "grpclb/etcdv3"
	pb "grpclb/helloword"
	"io"
	"log"
	"net"
	"os"
	"os/signal"
	"strings"
	"sync"
	"syscall"
	"time"
	"utils"

	"github.com/coreos/etcd/clientv3"
	"github.com/coreos/etcd/mvcc/mvccpb"
	"github.com/golang/protobuf/ptypes/timestamp"
	"github.com/pkg/errors"
	"google.golang.org/grpc"
	"google.golang.org/grpc/metadata"
	"google.golang.org/grpc/peer"
)

type Service struct{}

type ConnectPool struct {
	sync.Map
}

type RemoteCommand struct {
	Command string
	Args    map[string]string
}
type RemoteChannel struct {
	In  chan RemoteCommand
	Out chan RemoteCommand
	cli *clientv3.Client
}

type SessionManager struct {
	cli *clientv3.Client
}

type Session struct {
	Name  string
	Token string
}

var connect_pool *ConnectPool
var remote_channel *RemoteChannel

var session_manger *SessionManager

// 将消息广播给其他service. 因为做了负载均衡,一个client stream有可能落在不同节点,需要将行为广播给所有的节点
func ReadyBroadCast(from, message string) {
	remote_channel.Out <- RemoteCommand{
		Command: "broadcast",
		Args: map[string]string{
			"from":    from,
			"message": message,
		},
	}
}

func (sm *SessionManager) Get(token string) (*Session, error) {
	key := fmt.Sprintf("%s/%s/session/%s", grpclb.Prefix, *srv, token)
	resp, err := sm.cli.Get(context.Background(), key)
	if err != nil {
		return nil, err
	}
	kv := resp.Kvs[0]
	session := &Session{}
	err = json.Unmarshal(kv.Value, session)
	if err != nil {
		return nil, errors.Wrap(err, "failed to unmarshal session data")
	}

	_, err = sm.cli.KeepAliveOnce(context.Background(), clientv3.LeaseID(kv.Lease))
	utils.CheckErrorPanic(err)

	return session, nil
}

func (sm *SessionManager) GetFromContext(ctx context.Context) (*Session, error) {
	md, _ := metadata.FromIncomingContext(ctx)
	tokens := md["token"]
	if len(tokens) == 0 {
		return nil, errors.New("Miss token")
	}

	return sm.Get(tokens[0])
}

func (sm *SessionManager) New(name string) (*Session, error) {
	buf := make([]byte, 16)
	io.ReadFull(rand.Reader, buf)
	token := hex.EncodeToString(buf)

	key := fmt.Sprintf("%s/%s/session/%s", grpclb.Prefix, *srv, token)
	grant, err := sm.cli.Grant(context.Background(), 60*5) // token有效期5分钟
	if err != nil {
		return nil, errors.Wrap(err, "grant etcd lease ")
	}

	session := &Session{
		Name:  name,
		Token: token,
	}
	buf, err = json.Marshal(session)

	_, err = sm.cli.Put(context.Background(), key, string(buf), clientv3.WithLease(grant.ID))
	if err != nil {
		return nil, errors.Wrap(err, "etcd3 put")
	}

	return session, nil
}

func (p *ConnectPool) Get(name string) pb.Greeter_SayHelloServer {
	if stream, ok := p.Load(name); ok {

		return stream.(pb.Greeter_SayHelloServer)
	} else {
		return nil
	}
}

func (p *ConnectPool) Add(name string, stream pb.Greeter_SayHelloServer) {
	p.Store(name, stream)
}

func (p *ConnectPool) Del(name string) {
	p.Delete(name)
}

func (p *ConnectPool) BroadCast(from, message string) {
	log.Printf("BroadCast from: %s, message: %s\n", from, message)
	p.Range(func(username_i, stream_i interface{}) bool {
		username := username_i.(string)
		stream := stream_i.(pb.Greeter_SayHelloServer)
		if username == from {
			return true

		} else {
			log.Printf("From %s to %s\n", from, username)
			utils.CheckErrorPanic(stream.Send(&pb.HelloReply{
				Message:     message,
				MessageType: pb.HelloReply_NORMAL_MESSAGE,
				TS:          ×tamp.Timestamp{Seconds: time.Now().Unix()},
			}))
		}
		return true
	})
}

func (s *Service) Login(ctx context.Context, in *pb.LoginRequest) (*pb.LoginReply, error) {
	if connect_pool.Get(in.GetUsername()) != nil {
		return nil, errors.Errorf("username %s already exists", in.GetUsername())
	}
	session, err := session_manger.New(in.GetUsername())
	if err != nil {
		return nil, err
	}
	ReadyBroadCast(in.GetUsername(), fmt.Sprintf("Welcome %s!", in.GetUsername()))

	return &pb.LoginReply{
		Token:   session.Token,
		Success: true,
		Message: "success",
	}, nil
}

func (s *Service) SayHello(stream pb.Greeter_SayHelloServer) error {
	var (
		session *Session
		err     error
	)

	peer, _ := peer.FromContext(stream.Context())
	log.Printf("Received new connection.  %s", peer.Addr.String())
	session, err = session_manger.GetFromContext(stream.Context()) // context中带有token, 取出session

	if err != nil {
		stream.Send(&pb.HelloReply{
			Message:     err.Error(),
			MessageType: pb.HelloReply_CONNECT_FAILED,
		})
		return nil
	}
	username := session.Name

	connect_pool.Add(username, stream)
	stream.Send(&pb.HelloReply{
		Message:     fmt.Sprintf("Connect success!"),
		MessageType: pb.HelloReply_CONNECT_SUCCESS,
	}) // 发送连接成功的提醒
	go func() {
		<-stream.Context().Done()
		connect_pool.Del(username) // 用户离开聊天室时, 从连接池中删除它
		ReadyBroadCast(username, fmt.Sprintf("%s leval room", username))
	}()
	for {
		req, err := stream.Recv()
		if err != nil {
			return err
		}
		ReadyBroadCast(username, fmt.Sprintf("%s: %s", username, req.Message))
	}
	return nil
}

var (
	srv  = flag.String("service", "chat_service", "service name")
	port = flag.Int("port", 8880, "listening port")
	reg  = flag.String("reg", "http://127.0.0.1:2479", "register etcd address")
)

func GetListen() string {
	return fmt.Sprintf("0.0.0.0:%d", *port)
}

func NewSessionManager(cli *clientv3.Client) *SessionManager {
	return &SessionManager{
		cli: cli,
	}
}

// 通过利用etcd3的watch来接受来自其他节点的操作行为
func NewRemoteChannel(cli *clientv3.Client) *RemoteChannel {
	qc := &RemoteChannel{
		cli: cli,
		In:  make(chan RemoteCommand, 1),
		Out: make(chan RemoteCommand, 1),
	}

	go func() {
		var command RemoteCommand
		var channel string = fmt.Sprintf("%s/%s/channel", grpclb.Prefix, *srv)
		var buf bytes.Buffer
		var err error

		var dec *gob.Decoder

		rch := qc.cli.Watch(context.Background(), channel)
		for wresp := range rch {
			for _, ev := range wresp.Events {
				buf.Reset()
				dec = gob.NewDecoder(&buf)
				switch ev.Type {
				case mvccpb.PUT:
					buf.Write(ev.Kv.Value)
					err = dec.Decode(&command)
					if err != nil {
						log.Printf("recv an error message. %s\n", err.Error())
					} else {
						qc.In <- command
					}
				}
			}
		}
	}()

	go func() {
		var command RemoteCommand
		var channel string = fmt.Sprintf("%s/%s/channel", grpclb.Prefix, *srv)
		var buf bytes.Buffer
		var enc *gob.Encoder

		for {
			buf.Reset()
			enc = gob.NewEncoder(&buf)
			command = <-qc.Out
			utils.CheckErrorPanic(enc.Encode(command))
			qc.cli.Put(context.Background(),
				channel,
				buf.String())
		}
	}()

	return qc
}

func NewEtcd3Client() (*clientv3.Client, error) {
	cli, err := clientv3.New(clientv3.Config{
		Endpoints: strings.Split(*reg, ","),
	})
	if err != nil {
		return nil, errors.Wrap(err, fmt.Sprintf("Create etcd3 client failed: %s", err.Error()))
	}

	return cli, nil
}

func main() {
	var err error
	flag.Parse()
	connect_pool = &ConnectPool{}

	etcd_cli, err := NewEtcd3Client()
	utils.CheckErrorPanic(err)
	remote_channel = NewRemoteChannel(etcd_cli)
	session_manger = NewSessionManager(etcd_cli)

	go func() {
		var command RemoteCommand
		for command = range remote_channel.In {
			switch command.Command {
			case "broadcast":
				connect_pool.BroadCast(command.Args["from"], command.Args["message"])
			}
		}
	}()

	lis, err := net.Listen("tcp", GetListen())
	utils.CheckErrorPanic(err)
	fmt.Println("Listen on", GetListen())

	err = grpclb.Register(*srv, "127.0.0.1", *port, *reg, time.Second*3, 15) // 注册当前节点到etcd
	utils.CheckErrorPanic(err)

	ch := make(chan os.Signal)
	signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP, syscall.SIGQUIT)
	go func() {
		s := <-ch
		log.Printf("receive signal '%v'\n", s)

		grpclb.UnRegister() // 程序被退出后,主动去unregister
		signal.Stop(ch)

		switch s := s.(type) {
		case syscall.Signal:
			syscall.Kill(os.Getpid(), s)

		default:
			os.Exit(1)
		}
	}()

	s := grpc.NewServer(grpc.RPCCompressor(grpc.NewGZIPCompressor()),
		grpc.RPCDecompressor(grpc.NewGZIPDecompressor()))
	pb.RegisterGreeterServer(s, &Service{})

	utils.CheckErrorPanic(s.Serve(lis))
}

client

package main

import (
	"bufio"
	"context"
	"flag"
	"fmt"
	grpclb "grpclb/etcdv3"
	"io"
	"log"
	"os"
	"sync"
	"time"
	"utils"

	"github.com/pkg/errors"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/metadata"

	pb "grpclb/helloword"
)

var name *string = flag.String("name", "guess", "what's your name?")
var reg *string = flag.String("reg", "http://127.0.0.1:2479", "register etcd address")
var serv *string = flag.String("service", "chat_service", "service name")
var mutex sync.Mutex

func ConsoleLog(message string) {
	mutex.Lock()
	defer mutex.Unlock()
	fmt.Printf("\n------ %s -----\n%s\n> ", time.Now(), message)
}

func Input(prompt string) string {
	fmt.Print(prompt)
	reader := bufio.NewReader(os.Stdin)
	line, _, err := reader.ReadLine()
	if err != nil {
		if err == io.EOF {
			return ""
		} else {
			panic(errors.Wrap(err, "Input"))
		}
	}
	return string(line)
}

type Robot struct {
	sync.Mutex
	conn        *grpc.ClientConn
	client      pb.GreeterClient
	chat_stream pb.Greeter_SayHelloClient
	ctx         context.Context
	cancel      context.CancelFunc
	token       string
}

func (robot *Robot) Cancel() {
	robot.cancel()
}

func (robot *Robot) Done() <-chan struct{} {
	return robot.ctx.Done()
}

func (robot *Robot) Connect() error {
	robot.Lock()
	defer robot.Unlock()

	if robot.conn != nil {
		robot.conn.Close()
	}

	r := grpclb.NewResolver(*serv)
	lb := grpc.RoundRobin(r)

	ctx, cancel := context.WithCancel(context.Background())
	robot.ctx = ctx
	robot.cancel = cancel

	conn, err := grpc.DialContext(ctx, *reg, grpc.WithInsecure(),
		grpc.WithDecompressor(grpc.NewGZIPDecompressor()),
		grpc.WithCompressor(grpc.NewGZIPCompressor()),
		grpc.WithBalancer(lb), grpc.WithBlock())

	if err != nil {
		return errors.Wrap(err, "Client Connect")
	}

	client := pb.NewGreeterClient(conn)

	robot.conn = conn
	robot.client = client
	robot.chat_stream = nil
	return nil
}

func (robot *Robot) GetChatStream() pb.Greeter_SayHelloClient {
	robot.Lock()
	defer robot.Unlock()
	if robot.chat_stream != nil {
		return robot.chat_stream
	}
	ctx := metadata.NewOutgoingContext(context.Background(), metadata.Pairs("token", robot.token))
	for {
		stream, err := robot.client.SayHello(ctx)
		if err != nil {
			fmt.Printf("get chat stream failed. %s", err.Error())
			time.Sleep(1 * time.Second)
		} else {
			robot.chat_stream = stream
			return robot.chat_stream
		}
	}

	return nil
}

func (robot *Robot) Login(username string) error {
	robot.Lock()
	defer robot.Unlock()
	reply, err := robot.client.Login(context.Background(), &pb.LoginRequest{
		Username: username,
	})
	if err != nil {
		return errors.Wrap(err, "Login")
	}
	robot.token = reply.GetToken()
	return nil
}

func NewRobot() *Robot {
	robot := &Robot{}
	utils.CheckErrorPanic(robot.Connect())

	return robot
}

func main() {
	flag.Parse()

	robot := NewRobot()
	utils.CheckErrorPanic(robot.Login(*name))
	ConsoleLog("登录成功")

	// 监听服务端通知
	go func() {
		var (
			reply *pb.HelloReply
			err   error
		)
		for {
			reply, err = robot.GetChatStream().Recv()
			if err != nil && grpc.Code(err) == codes.Unavailable {
				ConsoleLog("与服务器的连接被断开, 进行重试")
				robot.Connect()
				ConsoleLog("重连成功")
				time.Sleep(time.Second)
				continue
			}
			utils.CheckErrorPanic(err)
			ConsoleLog(reply.Message)
			if reply.MessageType == pb.HelloReply_CONNECT_FAILED {
				log.Println("Connect failed.")
				robot.Cancel()
				break
			}
		}
	}()

	// 接受聊天信息并发送聊天内容
	go func() {
		var (
			line string
			err  error
		)
		for {
			line = Input("")
			if line == "exit" {
				robot.Cancel()
				break
			}
			err = robot.GetChatStream().Send(&pb.HelloRequest{
				Message: line,
			})
			fmt.Print("> ")
			if err != nil {
				ConsoleLog(fmt.Sprintf("there was error sending data. %s", err.Error()))
				continue
			}
		}
	}()
	<-robot.Done()

	fmt.Println("Bye")
}

grpclb/etcdv3

具体实现见参考链接 我只做了小小的修改

github

https://github.com/lujinda/grpc-chat

转载于:https://my.oschina.net/tuxpy/blog/1645030

你可能感兴趣的:(grpc实现一个较复杂的聊天室)