grpc python 源码分析（1）：server 的创建和启动

grpc python 源码分为三部分：python——cython——c++ , 本系列文章分析的是 python 部分代码，其它部分不涉及（其实是我看不懂）

版本：1.24.3

helloworld
先来看官方的一个例子

from concurrent import futures
import logging
import grpc
import helloworld_pb2
import helloworld_pb2_grpc

class Greeter(helloworld_pb2_grpc.GreeterServicer):
    def SayHello(self, request, context):
        return helloworld_pb2.HelloReply(message='Hello, %s!' % request.name)

def serve():
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))  # 1️⃣ 创建 server
    helloworld_pb2_grpc.add_GreeterServicer_to_server(Greeter(), server)  # 2️⃣ 注册接口方法
    server.add_insecure_port('[::]:50051')  # 3️⃣ 绑定监听端口
    server.start()  # 4️⃣ 启动 server
    server.wait_for_termination()  # 5️⃣ 接受终止信号

if __name__ == '__main__':
    logging.basicConfig()
    serve()

1️⃣ 创建 server
这里我们传了一个线程池给 grpc 的 server ，这个线程池用来处理请求。
经过重重调用，最后我们得到的 server 是 _Server 的实例

class _Server(grpc.Server):

    # pylint: disable=too-many-arguments
    def __init__(self, thread_pool, generic_handlers, interceptors, options,
                 maximum_concurrent_rpcs, compression):
        completion_queue = cygrpc.CompletionQueue()
        server = cygrpc.Server(_augment_options(options, compression))
        server.register_completion_queue(completion_queue)
        self._state = _ServerState(completion_queue, server, generic_handlers,
                                   _interceptor.service_pipeline(interceptors),
                                   thread_pool, maximum_concurrent_rpcs)

cygrpc.CompletionQueue 和 cygrpc.Server 都是调用底层的 c++ core ，我们不去管它。
再来看看这个 _ServerState 的代码

class _ServerState(object):

    # pylint: disable=too-many-arguments
    def __init__(self, completion_queue, server, generic_handlers,
                 interceptor_pipeline, thread_pool, maximum_concurrent_rpcs):
        self.lock = threading.RLock()
        self.completion_queue = completion_queue
        self.server = server
        self.generic_handlers = list(generic_handlers)
        self.interceptor_pipeline = interceptor_pipeline
        self.thread_pool = thread_pool
        self.stage = _ServerStage.STOPPED
        self.termination_event = threading.Event()
        self.shutdown_events = [self.termination_event]
        self.maximum_concurrent_rpcs = maximum_concurrent_rpcs
        self.active_rpc_count = 0

        # TODO(https://github.com/grpc/grpc/issues/6597): eliminate these fields.
        self.rpc_states = set()
        self.due = set()

        # A "volatile" flag to interrupt the daemon serving thread
        self.server_deallocated = False

从这里我们可以看到，python 的 server 只是对底层的简单封装，关于网络IO的处理完全是底层的 c++ core 负责，python 主要负责调用开发者的接口处理请求。

2️⃣ 注册接口方法
这步负责将我们开发好的接口注册到服务器上，调用的是编译 proto 文件生成的 _pb2_grpc 后缀文件的函数。

def add_GreeterServicer_to_server(servicer, server):
  rpc_method_handlers = {
      'SayHello': grpc.unary_unary_rpc_method_handler(
          servicer.SayHello,  # 接口方法
          request_deserializer=helloworld__pb2.HelloRequest.FromString,  # 反序列化方法
          response_serializer=helloworld__pb2.HelloReply.SerializeToString,  # 序列化方法
      ),
  }
  generic_handler = grpc.method_handlers_generic_handler(
      'helloworld.Greeter', rpc_method_handlers)
  server.add_generic_rpc_handlers((generic_handler,))

请求的路由分发使用的是字典，key 是我们定义的接口名，value 则是一个命名元组，里面保存的我们的接口方法、序列化方法和反序列化。

3️⃣ 绑定监听端口
这个最后是调用 c++ core 的代码，直接忽略

4️⃣ 服务启动
server 的 start 方法只是调用 _start 函数

class _Server(grpc.Server):
    def start(self):
        _start(self._state)

def _start(state):
    with state.lock:
        if state.stage is not _ServerStage.STOPPED:
            raise ValueError('Cannot start already-started server!')
        state.server.start()  # 调用的 c++ 
        state.stage = _ServerStage.STARTED
        _request_call(state)  # 调用的 c++

        thread = threading.Thread(target=_serve, args=(state,))
        thread.daemon = True
        thread.start()

这里拉起了一个线程调用 _serve 函数，下面则是 _server 的代码

def _serve(state):
    while True:
        timeout = time.time() + _DEALLOCATED_SERVER_CHECK_PERIOD_S
        event = state.completion_queue.poll(timeout)
        if state.server_deallocated:
            _begin_shutdown_once(state)
        if event.completion_type != cygrpc.CompletionType.queue_timeout:
            if not _process_event_and_continue(state, event):
                return
        # We want to force the deletion of the previous event
        # ~before~ we poll again; if the event has a reference
        # to a shutdown Call object, this can induce spinlock.
        event = None

服务器便是在这里接受请求，并调用接口方法处理请求的。

5️⃣ 接受终止信号
这里是为了防止主线程挂掉，以前的写法是这样的

        try:
          while True:
            time.sleep(86400)
        except KeyboardInterrupt:
            self.grpc_server.stop(0)

当时还奇怪为什么不封装成一个方法，这次最新版的则是调用一个方法。
不过这个方法跟之前的逻辑不一样了，具体看代码

class _Server(grpc.Server):

    def wait_for_termination(self, timeout=None):
        # NOTE(https://bugs.python.org/issue35935)
        # Remove this workaround once threading.Event.wait() is working with
        # CTRL+C across platforms.
        return _common.wait(
            self._state.termination_event.wait,
            self._state.termination_event.is_set,
            timeout=timeout)

顺着链接看了一下，说是在一些版本的 python 调用 Event.wait 在 win10 无法用 Ctrl+C 中断。
我用 python2.7.17 和 python3.7.4 试了一下，是可以用 Ctrl+C 中断的，看来问题在新版解决了(o_ _)ﾉ
python2 的 Event.wait 还有另外一个问题，如果 Event.wait 没有传一个时间，那么信号处理函数无法被触发。如下所示

import threading
import signal


def main():
    event = threading.Event()

    def handler(sig, stack):
        print(sig, stack)
        event.set()

    signal.signal(signal.SIGINT, handler)
    event.wait()


if __name__ == '__main__':
    main()

关于这点，在 grpc 的 _common.wait 代码注释里也有说到

def wait(wait_fn, wait_complete_fn, timeout=None, spin_cb=None):
    """Blocks waiting for an event without blocking the thread indefinitely.

    See https://github.com/grpc/grpc/issues/19464 for full context. CPython's
    `threading.Event.wait` and `threading.Condition.wait` methods, if invoked
    without a timeout kwarg, may block the calling thread indefinitely. If the
    call is made from the main thread, this means that signal handlers may not
    run for an arbitrarily long period of time.

    ......
    """

令我感到奇怪的是，为什么要另起一个线程负责接收请求，而不是在主线程进行。
为此，我准备改写一下代码，将接收请求的逻辑写在主线程中，看看会有什么问题；同时还要记得将 _start 方法中起线程的代码注释掉（推荐在 virtualenv 的环境下尝试，免得搞错）

from concurrent import futures
import logging

import grpc
import grpc._server

import helloworld_pb2
import helloworld_pb2_grpc

class Greeter(helloworld_pb2_grpc.GreeterServicer):

    def SayHello(self, request, context):
        return helloworld_pb2.HelloReply(message='Hello, %s!' % request.name)

def serve():
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
    helloworld_pb2_grpc.add_GreeterServicer_to_server(Greeter(), server)
    server.add_insecure_port('[::]:50051')
    server.start()
    grpc._server._serve(server._state)
    # server.wait_for_termination()

if __name__ == '__main__':
    logging.basicConfig()
    serve()

如果像上面这样运行的话，使用 CTRL+C 线程并不会马上停止，而是要好几次 CTRL+C ，
或者对这个服务发起调用。
这样看来，这个主线程主要使用来接收控制信号的。
感觉跟 _serve 函数里的 state.completion_queue.poll 有关，前面我们分析过这个 completion_queue 是属于 c++ 部分的

def _serve(state):
    while True:
        timeout = time.time() + _DEALLOCATED_SERVER_CHECK_PERIOD_S
        event = state.completion_queue.poll(timeout)
        if state.server_deallocated:
            _begin_shutdown_once(state)
        if event.completion_type != cygrpc.CompletionType.queue_timeout:
            if not _process_event_and_continue(state, event):
                return
        # We want to force the deletion of the previous event
        # ~before~ we poll again; if the event has a reference
        # to a shutdown Call object, this can induce spinlock.
        event = None

总结

temp.png

grpc python 源码分析（1）：server 的创建和启动

你可能感兴趣的:(grpc python 源码分析（1）：server 的创建和启动)