为什么80%的码农都做不了架构师?>>>
背景
接上文 https://my.oschina.net/tuxpy/blog/1631953
之前通过grpc实现了一个双向流的方式来实现聊天室. 所有的stream都是存在一个sync.Map
里。如果这时候聊天室的压力大了,如何做扩展? 说得简单点就是怎么样来做负载均衡。之前在https://segmentfault.com/a/1190000008672912 看到过一篇借助etcd来实现负载均衡的文章,就借来实现了哈.
问题
- grpc的服务发现如何做?
- 某一个client发送的聊天内容,如何让分布在不同的节点上的client都收到
- 当某些client连接的service节点down了,如何让client重新发现并连接健康的节点? 如果所有的rpc方法都是request <-> response模式 倒没什么问题,大不了有可能会丢次当前正在处理的请求. 但是像聊天室这类应用,使用双向流模式, 一旦一个节点down了,后续服务端的stream就会得到
codes.Unavailable
错误
解决
- 参考https://segmentfault.com/a/1190000008672912 。服务端注册key, 定期keepalive, 客户端watch
- 利用etcd的put + watch实现一个remote channel,所有服务节点监听某一个key的PUT行为,根据value内容调用相应的方法(如广播消息给连接到自己的所有客户端). 之前还想过,每一个service同时还是一个grpc client,每次自己有广播行为时, 远程调用其他节点广播操作。天呐,还要维护一份所有节点的清单,杀了我吧.
- 将身份验证操作和聊天操作分离出来,先进行身份验证,取出token, 后续聊天的rpc调用时带上. token验证借助etcd实现的session, 每次断线重连,只影响聊天的stream, 不会重复登录.
实现
service
/*
*
* Author : tuxpy
* Email : [email protected]
* Create time : 3/7/18 9:18 AM
* Filename : service.go
* Description :
*
*
*/
package main
import (
"bytes"
"context"
"crypto/rand"
"encoding/gob"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
grpclb "grpclb/etcdv3"
pb "grpclb/helloword"
"io"
"log"
"net"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
"utils"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/golang/protobuf/ptypes/timestamp"
"github.com/pkg/errors"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/peer"
)
type Service struct{}
type ConnectPool struct {
sync.Map
}
type RemoteCommand struct {
Command string
Args map[string]string
}
type RemoteChannel struct {
In chan RemoteCommand
Out chan RemoteCommand
cli *clientv3.Client
}
type SessionManager struct {
cli *clientv3.Client
}
type Session struct {
Name string
Token string
}
var connect_pool *ConnectPool
var remote_channel *RemoteChannel
var session_manger *SessionManager
// 将消息广播给其他service. 因为做了负载均衡,一个client stream有可能落在不同节点,需要将行为广播给所有的节点
func ReadyBroadCast(from, message string) {
remote_channel.Out <- RemoteCommand{
Command: "broadcast",
Args: map[string]string{
"from": from,
"message": message,
},
}
}
func (sm *SessionManager) Get(token string) (*Session, error) {
key := fmt.Sprintf("%s/%s/session/%s", grpclb.Prefix, *srv, token)
resp, err := sm.cli.Get(context.Background(), key)
if err != nil {
return nil, err
}
kv := resp.Kvs[0]
session := &Session{}
err = json.Unmarshal(kv.Value, session)
if err != nil {
return nil, errors.Wrap(err, "failed to unmarshal session data")
}
_, err = sm.cli.KeepAliveOnce(context.Background(), clientv3.LeaseID(kv.Lease))
utils.CheckErrorPanic(err)
return session, nil
}
func (sm *SessionManager) GetFromContext(ctx context.Context) (*Session, error) {
md, _ := metadata.FromIncomingContext(ctx)
tokens := md["token"]
if len(tokens) == 0 {
return nil, errors.New("Miss token")
}
return sm.Get(tokens[0])
}
func (sm *SessionManager) New(name string) (*Session, error) {
buf := make([]byte, 16)
io.ReadFull(rand.Reader, buf)
token := hex.EncodeToString(buf)
key := fmt.Sprintf("%s/%s/session/%s", grpclb.Prefix, *srv, token)
grant, err := sm.cli.Grant(context.Background(), 60*5) // token有效期5分钟
if err != nil {
return nil, errors.Wrap(err, "grant etcd lease ")
}
session := &Session{
Name: name,
Token: token,
}
buf, err = json.Marshal(session)
_, err = sm.cli.Put(context.Background(), key, string(buf), clientv3.WithLease(grant.ID))
if err != nil {
return nil, errors.Wrap(err, "etcd3 put")
}
return session, nil
}
func (p *ConnectPool) Get(name string) pb.Greeter_SayHelloServer {
if stream, ok := p.Load(name); ok {
return stream.(pb.Greeter_SayHelloServer)
} else {
return nil
}
}
func (p *ConnectPool) Add(name string, stream pb.Greeter_SayHelloServer) {
p.Store(name, stream)
}
func (p *ConnectPool) Del(name string) {
p.Delete(name)
}
func (p *ConnectPool) BroadCast(from, message string) {
log.Printf("BroadCast from: %s, message: %s\n", from, message)
p.Range(func(username_i, stream_i interface{}) bool {
username := username_i.(string)
stream := stream_i.(pb.Greeter_SayHelloServer)
if username == from {
return true
} else {
log.Printf("From %s to %s\n", from, username)
utils.CheckErrorPanic(stream.Send(&pb.HelloReply{
Message: message,
MessageType: pb.HelloReply_NORMAL_MESSAGE,
TS: ×tamp.Timestamp{Seconds: time.Now().Unix()},
}))
}
return true
})
}
func (s *Service) Login(ctx context.Context, in *pb.LoginRequest) (*pb.LoginReply, error) {
if connect_pool.Get(in.GetUsername()) != nil {
return nil, errors.Errorf("username %s already exists", in.GetUsername())
}
session, err := session_manger.New(in.GetUsername())
if err != nil {
return nil, err
}
ReadyBroadCast(in.GetUsername(), fmt.Sprintf("Welcome %s!", in.GetUsername()))
return &pb.LoginReply{
Token: session.Token,
Success: true,
Message: "success",
}, nil
}
func (s *Service) SayHello(stream pb.Greeter_SayHelloServer) error {
var (
session *Session
err error
)
peer, _ := peer.FromContext(stream.Context())
log.Printf("Received new connection. %s", peer.Addr.String())
session, err = session_manger.GetFromContext(stream.Context()) // context中带有token, 取出session
if err != nil {
stream.Send(&pb.HelloReply{
Message: err.Error(),
MessageType: pb.HelloReply_CONNECT_FAILED,
})
return nil
}
username := session.Name
connect_pool.Add(username, stream)
stream.Send(&pb.HelloReply{
Message: fmt.Sprintf("Connect success!"),
MessageType: pb.HelloReply_CONNECT_SUCCESS,
}) // 发送连接成功的提醒
go func() {
<-stream.Context().Done()
connect_pool.Del(username) // 用户离开聊天室时, 从连接池中删除它
ReadyBroadCast(username, fmt.Sprintf("%s leval room", username))
}()
for {
req, err := stream.Recv()
if err != nil {
return err
}
ReadyBroadCast(username, fmt.Sprintf("%s: %s", username, req.Message))
}
return nil
}
var (
srv = flag.String("service", "chat_service", "service name")
port = flag.Int("port", 8880, "listening port")
reg = flag.String("reg", "http://127.0.0.1:2479", "register etcd address")
)
func GetListen() string {
return fmt.Sprintf("0.0.0.0:%d", *port)
}
func NewSessionManager(cli *clientv3.Client) *SessionManager {
return &SessionManager{
cli: cli,
}
}
// 通过利用etcd3的watch来接受来自其他节点的操作行为
func NewRemoteChannel(cli *clientv3.Client) *RemoteChannel {
qc := &RemoteChannel{
cli: cli,
In: make(chan RemoteCommand, 1),
Out: make(chan RemoteCommand, 1),
}
go func() {
var command RemoteCommand
var channel string = fmt.Sprintf("%s/%s/channel", grpclb.Prefix, *srv)
var buf bytes.Buffer
var err error
var dec *gob.Decoder
rch := qc.cli.Watch(context.Background(), channel)
for wresp := range rch {
for _, ev := range wresp.Events {
buf.Reset()
dec = gob.NewDecoder(&buf)
switch ev.Type {
case mvccpb.PUT:
buf.Write(ev.Kv.Value)
err = dec.Decode(&command)
if err != nil {
log.Printf("recv an error message. %s\n", err.Error())
} else {
qc.In <- command
}
}
}
}
}()
go func() {
var command RemoteCommand
var channel string = fmt.Sprintf("%s/%s/channel", grpclb.Prefix, *srv)
var buf bytes.Buffer
var enc *gob.Encoder
for {
buf.Reset()
enc = gob.NewEncoder(&buf)
command = <-qc.Out
utils.CheckErrorPanic(enc.Encode(command))
qc.cli.Put(context.Background(),
channel,
buf.String())
}
}()
return qc
}
func NewEtcd3Client() (*clientv3.Client, error) {
cli, err := clientv3.New(clientv3.Config{
Endpoints: strings.Split(*reg, ","),
})
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("Create etcd3 client failed: %s", err.Error()))
}
return cli, nil
}
func main() {
var err error
flag.Parse()
connect_pool = &ConnectPool{}
etcd_cli, err := NewEtcd3Client()
utils.CheckErrorPanic(err)
remote_channel = NewRemoteChannel(etcd_cli)
session_manger = NewSessionManager(etcd_cli)
go func() {
var command RemoteCommand
for command = range remote_channel.In {
switch command.Command {
case "broadcast":
connect_pool.BroadCast(command.Args["from"], command.Args["message"])
}
}
}()
lis, err := net.Listen("tcp", GetListen())
utils.CheckErrorPanic(err)
fmt.Println("Listen on", GetListen())
err = grpclb.Register(*srv, "127.0.0.1", *port, *reg, time.Second*3, 15) // 注册当前节点到etcd
utils.CheckErrorPanic(err)
ch := make(chan os.Signal)
signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP, syscall.SIGQUIT)
go func() {
s := <-ch
log.Printf("receive signal '%v'\n", s)
grpclb.UnRegister() // 程序被退出后,主动去unregister
signal.Stop(ch)
switch s := s.(type) {
case syscall.Signal:
syscall.Kill(os.Getpid(), s)
default:
os.Exit(1)
}
}()
s := grpc.NewServer(grpc.RPCCompressor(grpc.NewGZIPCompressor()),
grpc.RPCDecompressor(grpc.NewGZIPDecompressor()))
pb.RegisterGreeterServer(s, &Service{})
utils.CheckErrorPanic(s.Serve(lis))
}
client
package main
import (
"bufio"
"context"
"flag"
"fmt"
grpclb "grpclb/etcdv3"
"io"
"log"
"os"
"sync"
"time"
"utils"
"github.com/pkg/errors"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
pb "grpclb/helloword"
)
var name *string = flag.String("name", "guess", "what's your name?")
var reg *string = flag.String("reg", "http://127.0.0.1:2479", "register etcd address")
var serv *string = flag.String("service", "chat_service", "service name")
var mutex sync.Mutex
func ConsoleLog(message string) {
mutex.Lock()
defer mutex.Unlock()
fmt.Printf("\n------ %s -----\n%s\n> ", time.Now(), message)
}
func Input(prompt string) string {
fmt.Print(prompt)
reader := bufio.NewReader(os.Stdin)
line, _, err := reader.ReadLine()
if err != nil {
if err == io.EOF {
return ""
} else {
panic(errors.Wrap(err, "Input"))
}
}
return string(line)
}
type Robot struct {
sync.Mutex
conn *grpc.ClientConn
client pb.GreeterClient
chat_stream pb.Greeter_SayHelloClient
ctx context.Context
cancel context.CancelFunc
token string
}
func (robot *Robot) Cancel() {
robot.cancel()
}
func (robot *Robot) Done() <-chan struct{} {
return robot.ctx.Done()
}
func (robot *Robot) Connect() error {
robot.Lock()
defer robot.Unlock()
if robot.conn != nil {
robot.conn.Close()
}
r := grpclb.NewResolver(*serv)
lb := grpc.RoundRobin(r)
ctx, cancel := context.WithCancel(context.Background())
robot.ctx = ctx
robot.cancel = cancel
conn, err := grpc.DialContext(ctx, *reg, grpc.WithInsecure(),
grpc.WithDecompressor(grpc.NewGZIPDecompressor()),
grpc.WithCompressor(grpc.NewGZIPCompressor()),
grpc.WithBalancer(lb), grpc.WithBlock())
if err != nil {
return errors.Wrap(err, "Client Connect")
}
client := pb.NewGreeterClient(conn)
robot.conn = conn
robot.client = client
robot.chat_stream = nil
return nil
}
func (robot *Robot) GetChatStream() pb.Greeter_SayHelloClient {
robot.Lock()
defer robot.Unlock()
if robot.chat_stream != nil {
return robot.chat_stream
}
ctx := metadata.NewOutgoingContext(context.Background(), metadata.Pairs("token", robot.token))
for {
stream, err := robot.client.SayHello(ctx)
if err != nil {
fmt.Printf("get chat stream failed. %s", err.Error())
time.Sleep(1 * time.Second)
} else {
robot.chat_stream = stream
return robot.chat_stream
}
}
return nil
}
func (robot *Robot) Login(username string) error {
robot.Lock()
defer robot.Unlock()
reply, err := robot.client.Login(context.Background(), &pb.LoginRequest{
Username: username,
})
if err != nil {
return errors.Wrap(err, "Login")
}
robot.token = reply.GetToken()
return nil
}
func NewRobot() *Robot {
robot := &Robot{}
utils.CheckErrorPanic(robot.Connect())
return robot
}
func main() {
flag.Parse()
robot := NewRobot()
utils.CheckErrorPanic(robot.Login(*name))
ConsoleLog("登录成功")
// 监听服务端通知
go func() {
var (
reply *pb.HelloReply
err error
)
for {
reply, err = robot.GetChatStream().Recv()
if err != nil && grpc.Code(err) == codes.Unavailable {
ConsoleLog("与服务器的连接被断开, 进行重试")
robot.Connect()
ConsoleLog("重连成功")
time.Sleep(time.Second)
continue
}
utils.CheckErrorPanic(err)
ConsoleLog(reply.Message)
if reply.MessageType == pb.HelloReply_CONNECT_FAILED {
log.Println("Connect failed.")
robot.Cancel()
break
}
}
}()
// 接受聊天信息并发送聊天内容
go func() {
var (
line string
err error
)
for {
line = Input("")
if line == "exit" {
robot.Cancel()
break
}
err = robot.GetChatStream().Send(&pb.HelloRequest{
Message: line,
})
fmt.Print("> ")
if err != nil {
ConsoleLog(fmt.Sprintf("there was error sending data. %s", err.Error()))
continue
}
}
}()
<-robot.Done()
fmt.Println("Bye")
}
grpclb/etcdv3
具体实现见参考链接 我只做了小小的修改
github
https://github.com/lujinda/grpc-chat