

(金庆的专栏 2020.4)

grpc 对每个请求进行负载均衡。负载均衡的方式有:

  • 代理模式
  • 客户端实现
  • 外部负载均衡

参考:gRPC LB

gRPC 中负载均衡的主要机制是外部负载均衡。

gRPC 定义了外部负载均衡服务的接口:

  • load_balancer.proto 客户端向 lb 服查询后端列表
  • load_reporter.proto lb 服向后端服查询负载 实现了一个 grpc 的外部负载均衡服。
因为其实现早于负载均衡服的接口规范,所以接口定义与 grpc 规范不同。
见 issue#26:
grpclb 目前仅支持 consul 服务发现。

标准的 grpclb 实现目前好像只有。
jawlb 通过 Kubernetes API 来发现服务。

以下测试 grpc 客户端从 jawlb 服查询服务器列表,然后请求服务。
首先在本机开了多个 greeter 服实例,端口不同。
然后更改 greeter 客户端,不要直接连 greeter 服地址,而是配一个 jawlb 服地址。
同时更改 jawlb, 删除服务发现,改为固定输出本机服务列表,定时切换。

greeter 是指 grpc-go 中的例子:grpc-go\examples\helloworld\greeter

greeter 服更改


package main

import (

	pb ""

// GreeterServer is used to implement helloworld.GreeterServer.
type GreeterServer struct {

// SayHello implements helloworld.GreeterServer
func (s *GreeterServer) SayHello(ctx context.Context, in *pb.HelloRequest) (*pb.HelloReply, error) {
	msg := fmt.Sprintf("Hello %s from server-%d", in.Name, viper.GetInt("port"))
	return &pb.HelloReply{Message: msg}, nil

func main() {
	pflag.Int("port", 8000, "server bind port")
	port := viper.GetInt("port")

	addr := fmt.Sprintf(":%d", port)
	lis, err := net.Listen("tcp", addr)
	if err != nil {
		log.Fatalf("failed to listen: %v", err)
	s := grpc.NewServer()
	pb.RegisterGreeterServer(s, &GreeterServer{})

greeter 客户端更改

package main

import (

	_ ""
	pb ""

const (
	defaultName = "world"

func init() {

func main() {
	rb := manual.NewBuilderWithScheme("whatever")
	rb.InitialState(resolver.State{Addresses: []resolver.Address{
		{Addr: "", Type: resolver.GRPCLB},

	conn, err := grpc.Dial("whatever:///this-gets-overwritten", grpc.WithInsecure(), grpc.WithBlock(),
	if err != nil {
		log.Fatalf("did not connect: %v", err)
	defer conn.Close()
	c := pb.NewGreeterClient(conn)

	name := defaultName
	if len(os.Args) > 1 {
		name = os.Args[1]

	for {
		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
		r, err := c.SayHello(ctx, &pb.HelloRequest{Name: name})

		if err != nil {
			log.Fatalf("could not greet: %v", err)

		log.Printf("Greeting: %s", r.GetMessage())


  • import _ “”
  • grpc.Dial(“whatever:///this-gets-overwritten”, grpc.WithResolvers(rb))
    • 采用一个自定义解析器,用来获取 jawlb 地址
    • Scheme(“whatever”) 可以任意,用作解析器名字
    • 目标 this-gets-overwritten 可以任意,因为 jawlb 忽略了该名字
    • 是 jawlb 地址
  • 改为每秒请求一次

正常的 grpclb 是在 DNS 中设置 SRV 记录,
此处测试避免设置 DNS, 采用了一个自定义解析器,
用 DNS 设置的好处是, 可以直接解析为后端 IP, 也可以添加 grpclb, 代码上如同直接连接后端:

	conn, err := grpc.Dial("dns:///", grpc.WithInsecure())

jawlb 更改


删除所有配置,改为固定本机 8888 端口监听。

  • 删除 envconfig.MustProcess("JAWLB", &cfg)
  • listen() 改为
    func listen() (conn net.Listener, err error) {
    	conn, err = net.Listen("tcp", ":8888")


package main

import (

func watchService(ctx context.Context) (_ <-chan ServerList, err error) {
	ch := make(chan ServerList)

	go func() {
		ticker := time.NewTicker(10 * time.Second)
		i := 0
		for {
			select {
			case <-ctx.Done():
			case <-ticker.C:
				i += 1
				fmt.Printf("i = %d\n", i)
				ports := []int32{8010, 8020}
				var servers []Server
				for _, port := range ports {
					servers = append(servers, Server{IP: net.ParseIP(""), Port: port + int32(i%2)})
				ch <- servers
			} // select
		} // for

	return ch, nil

删除所有服务发现代码,改为每10秒切换端口:8010,8020 <-> 8011,8021



λ jawlb.exe
2020/04/16 15:35:17 waiting for TERM
i = 1
2020/04/16 15:35:27 endpoints:
2020/04/16 15:35:27
2020/04/16 15:35:27
i = 2
2020/04/16 15:35:37 endpoints:
2020/04/16 15:35:37
2020/04/16 15:35:37


运行 4 个实例:

server --port 8010
server --port 8020
server --port 8011
server --port 8021


λ client
INFO[0002] lbBalancer: handle SubConn state change: 0xc00008a590, CONNECTING
INFO[0002] Channel Connectivity change to CONNECTING
INFO[0002] lbBalancer: handle SubConn state change: 0xc00008a5f0, CONNECTING
INFO[0002] Subchannel picks a new address "" to connect
INFO[0002] Subchannel Connectivity change to READY
INFO[0002] lbBalancer: handle SubConn state change: 0xc00008a590, READY
INFO[0002] Channel Connectivity change to READY
INFO[0002] Subchannel Connectivity change to READY
INFO[0002] lbBalancer: handle SubConn state change: 0xc00008a5f0, READY
2020/04/16 15:37:47 Greeting: Hello world from server-8021
2020/04/16 15:37:48 Greeting: Hello world from server-8011
2020/04/16 15:37:49 Greeting: Hello world from server-8021
2020/04/16 15:37:50 Greeting: Hello world from server-8011
2020/04/16 15:37:51 Greeting: Hello world from server-8021
2020/04/16 15:37:52 Greeting: Hello world from server-8011
2020/04/16 15:37:53 Greeting: Hello world from server-8021
2020/04/16 15:37:54 Greeting: Hello world from server-8011
2020/04/16 15:37:55 Greeting: Hello world from server-8021
2020/04/16 15:37:56 Greeting: Hello world from server-8011
INFO[0012] lbBalancer: processing server list: servers: servers:
INFO[0012] lbBalancer: server list entry[0]: ipStr:||, port:|8020|, load balancer token:||
INFO[0012] lbBalancer: server list entry[1]: ipStr:||, port:|8010|, load balancer token:||
2020/04/16 15:37:57 Greeting: Hello world from server-8020
2020/04/16 15:37:58 Greeting: Hello world from server-8010
2020/04/16 15:37:59 Greeting: Hello world from server-8020
2020/04/16 15:38:00 Greeting: Hello world from server-8010
2020/04/16 15:38:01 Greeting: Hello world from server-8020
2020/04/16 15:38:02 Greeting: Hello world from server-8010
2020/04/16 15:38:03 Greeting: Hello world from server-8020
2020/04/16 15:38:04 Greeting: Hello world from server-8010
2020/04/16 15:38:05 Greeting: Hello world from server-8020
2020/04/16 15:38:06 Greeting: Hello world from server-8010
INFO[0022] lbBalancer: processing server list: servers: servers:
INFO[0022] lbBalancer: server list entry[0]: ipStr:||, port:|8021|, load balancer token:||
INFO[0022] lbBalancer: server list entry[1]: ipStr:||, port:|8011|, load balancer token:||
2020/04/16 15:38:07 Greeting: Hello world from server-8011
2020/04/16 15:38:08 Greeting: Hello world from server-8021
2020/04/16 15:38:09 Greeting: Hello world from server-8011


客户端应用一个自定义 resolver 解析 “whatever:///this-gets-overwritten”,
获取到 {Addr: "", Type: resolver.GRPCLB},
知道这是一个 grpclb,于是按 load_balancer.proto 的定义查询 jawlb 来获取后端地址列表。

jawlb 每 10s 更新一次服务器列表,每次输出多个地址。客户端在多个地址间轮换请求。


  • 不开 jawlb,客户端将无法成功请求,直到 jawlb 开启才成功
  • 中途关闭 jawlb, 请求仍会成功,但是保持为最后的服务器列表
    • 同时会不断尝试重连 jawlb, 但是重连成功后没有切换服务,应该是个错误
  • Dial() 不加 grpc.WithBlock() 参数, 报错:all SubConns are in TransientFailure
λ client
INFO[0000] parsed scheme: "whatever"
INFO[0000] ccResolverWrapper: sending update to cc: {[{   1 }]  }
INFO[0000] ClientConn switching balancer to "grpclb"
INFO[0000] Channel switches to new LB policy "grpclb"
INFO[0000] lbBalancer: UpdateClientConnState: {ResolverState:{Addresses:[{Addr: ServerName: Attributes: Type:1 Metadata:}] ServiceConfig: Attributes:} BalancerConfig:}
INFO[0000] parsed scheme: "grpclb-internal"
INFO[0000] ccResolverWrapper: sending update to cc: {[{   0 }]  }
INFO[0000] ClientConn switching balancer to "pick_first"
INFO[0000] Channel switches to new LB policy "pick_first"
INFO[0000] Subchannel Connectivity change to CONNECTING
INFO[0000] blockingPicker: the picked transport is not ready, loop back to repick
INFO[0000] pickfirstBalancer: HandleSubConnStateChange: 0xc00003fb10, {CONNECTING }
INFO[0000] Channel Connectivity change to CONNECTING
INFO[0000] Subchannel picks a new address "" to connect
INFO[0000] CPU time info is unavailable on non-linux or appengine environment.
INFO[0000] Subchannel Connectivity change to READY
INFO[0000] pickfirstBalancer: HandleSubConnStateChange: 0xc00003fb10, {READY }
INFO[0000] Channel Connectivity change to READY
INFO[0000] lbBalancer: processing server list: servers: servers:
INFO[0000] lbBalancer: server list entry[0]: ipStr:||, port:|8010|, load balancer token:||
INFO[0000] lbBalancer: server list entry[1]: ipStr:||, port:|8020|, load balancer token:||
INFO[0000] Subchannel Connectivity change to CONNECTING
INFO[0000] Subchannel Connectivity change to CONNECTING
INFO[0000] Channel Connectivity change to TRANSIENT_FAILURE
INFO[0000] lbBalancer: handle SubConn state change: 0xc00008a220, CONNECTING
INFO[0000] Channel Connectivity change to CONNECTING
INFO[0000] lbBalancer: handle SubConn state change: 0xc00008a280, CONNECTING
2020/04/16 16:40:06 could not greet: rpc error: code = Unavailable desc = all SubConns are in TransientFailure
