Spyglass 代码阅读

最近读了《理解Unix进程》，感觉挺不错。前两天又把书里的Spyglass源码读了下，有所收获，就顺便想动手写写笔记，记录下读代码的感受，加深下理解，顺便推荐下这本书。
Spyglass是一个非常简单的preforking的Web服务器，只适用于研究和学习Unix进程的工作方式。
进入代码部分，作者有非常明晰的注释，所以阅读起来非常舒畅。大部分的理解（中文部分）在作者的代码都有注释，所以谈不上分析，只是又根据代码理解，并且梳理了本书所学的理解。

bin/spyglass.rb

#!/usr/bin/env ruby
# load和require
THIS_FILE = File.symlink?(__FILE__) ? File.readlink(__FILE__) : __FILE__
$LOAD_PATH << File.dirname(THIS_FILE) + '/../lib'
require 'rubygems'
require 'spyglass'
require 'optparse'
# 命令行解析，社区里有人推荐的gli也不错。
opts = OptionParser.new do |opts|
  opts.banner = "Usage: spyglass [options]"

  opts.separator ""
  opts.separator "Ruby options:"

  lineno = 1
  opts.on("-e", "--eval LINE", "evaluate a LINE of code") { |line|
    eval line, TOPLEVEL_BINDING, "-e", lineno
    lineno += 1
  }

  opts.on("-d", "--debug", "set debugging flags (set $DEBUG to true)") {
    $DEBUG = true
  }

  opts.on("-w", "--warn", "turn warnings on for your script") {
    $-w = true
  }

  opts.on("-I", "--include PATH",
          "specify $LOAD_PATH (may be used more than once)") { |path|
    $LOAD_PATH.unshift(path.split(":"))
  }

  opts.on("-r", "--require LIBRARY",
          "require the library, before executing your script") { |library|
    require library
  }

  opts.separator ""
  opts.separator "Spyglass options:"
  
  opts.on("-p", "--port PORT", "use PORT (default: 4222)") { |port| 
    Spyglass::Config.port port
  }
  
  opts.on("-o", "--host HOST", "list on HOST (default: 0.0.0.0)") { |host| 
    Spyglass::Config.host host
  }

  opts.on("-c", "--configru FILE", "Load the rackup file at FILE (default: config.ru in current directory)") { |path| 
    Spyglass::Config.config_ru_path path
  }

  opts.on("-w", "--workers COUNT", "Prefork COUNT workers when handling requests (default: 2)") { |count| 
    Spyglass::Config.workers count.to_i
  }
  
  opts.on("-t", "--timeout SEC", "Time out the master process after SEC seconds (default: 30)") { |sec|
    Spyglass::Config.timeout sec.to_i
  }
  
  opts.on("-v", "--verbose", "Enable verbose output") { |verbose|
    Spyglass::Config.verbose true
  }

  opts.on("--vverbose", "Enable very verbose output") { |vverbose|
    Spyglass::Config.vverbose true
  }

  opts.on_tail("-h", "--help", "Show this message") do
    puts opts
    abort
  end

  # Another typical switch to print the version.
  opts.on_tail("--version", "Show version") do
    puts Spyglass::Version
    exit
  end
end

opts.parse!(ARGV)
＃ 直接调用Spyglass::Server.instance单例的start方法
Spyglass::Server.instance.start

lib/spyglass/server.rb

#---
# Excerpted from "Working with Unix Processes",
# published by The Pragmatic Bookshelf.
# Copyrights apply to this code. It may not be used to create training material, 
# courses, books, articles, and the like. Contact us if you are in doubt.
# We make no guarantees that this code is fit for any purpose. 
# Visit http://www.pragmaticprogrammer.com/titles/jsunix for more book information.
#---
module Spyglass

  class Server
    include Singleton
    include Logging

    def start
      # Opens the main listening socket for the server. Now the server is responsive to
      # incoming connections.
      sock = TCPServer.open(Config.host, Config.port)
      out "Listening on port #{Config.host}:#{Config.port}"
      # 调用Lookout单例的start方法
      Lookout.instance.start(sock)
    end
  end
end

lib/spyglass/lookout.rb

#---
# Excerpted from "Working with Unix Processes",
# published by The Pragmatic Bookshelf.
# Copyrights apply to this code. It may not be used to create training material, 
# courses, books, articles, and the like. Contact us if you are in doubt.
# We make no guarantees that this code is fit for any purpose. 
# Visit http://www.pragmaticprogrammer.com/titles/jsunix for more book information.
#---
module Spyglass
  class Lookout
    include Singleton, Logging

    # This method is the main entry point for the Lookout class. It takes
    # a socket object.
    def start(socket)
      # 定义捕获信号
      trap_signals

      # The Lookout doesn't know anything about the app itself, so there's
      # no app related setup to do here.
      # 不间断的接受socket connection
      loop do
        # Accepts a new connection on our socket. This class won't actually
        # do anything interesting with this connection, it will pass it down
        # to the `Master` class created below to do the actual request handling.
        conn = socket.accept
        out "Received incoming connection"

        # In this block the Lookout forks a new process and invokes a Master,
        # passing along the socket it received and the connection it accepted
        # above.
        # fork出@master_pid，并且在Master进程中调用start方法
        @master_pid = fork do
          master = Master.new(conn, socket)
          master.start
        end

        # The Lookout can now close its handle on the client socket. This doesn't
        # translate to the socket being closed on the clients end because the
        # forked Master process also has a handle on the same socket. Since this
        # handle is now cleaned up it's up to the Master process to ensure that
        # its handle gets cleaned up.
        # Master子进程中有该socket handle，所以connection不会关闭，当该socket所有子进程handle调用关闭后，connection才会关闭。
        conn.close
        # Now this process blocks until the Master process exits. The Master process
        # will only exit once traffic is slow enough that it has reached its timeout
        # without receiving any new connections.
        # 阻塞式等待
        Process.waitpid(@master_pid)
        
        # The interaction of fork(2)/waitpid(2) above deserve some explanation.
        
        # ### Why fork(2)? Why not just spin up the Master?
        # The whole point of the Lookout process is to be very lean. The only resource
        # that it initializes is the listening socket for the server. It doesn't load
        # any of your application into memory, so its resource footprint is very small.
        
        # The reason that it does a fork(2) before invoking the Master is because once
        # the Master times out we want the Lookout process to remain lean when accepting
        # the next connection. 
        
        # If it were to load the application code without forking 
        # then there would be no (simple) way for it to later unload the application code.
        
        # By doing a fork(2), then waiting for the Master process to exit, that guarantees
        # that all resources (notably memory usage) that were in use by the Master process
        # will be reclaimed by the kernel. 
        
        # ### Who knows what your app will demand!
        # While handling requests your app may require lots of memory. Containing this in a
        # child process, and exiting that process, is the easiest way to ensure that memory
        # bloat isn't shared with our simple parent process.
        
        # This allows our Lookout process will to go back around
        # the loop with nothing more than it started with, just a listening socket.
        
        # The fork(2)/waitpid(2) approach requires little code to implement, and pushes 
        # responsibility down to the kernel to track resource usage and nicely clean up
        # the Master process when it's finished.
      end
    end
    # 当有中断或者退出信号则kill @aster_pid
    def trap_signals
      [:INT, :QUIT].each do |sig|
        trap(sig) { 
          begin
            Process.kill(sig, @master_pid) if @master_pid
          rescue Errno::ESRCH
          end
          exit 
        }
      end
    end
  end
end

作者在这里对Lookout主进程中fork出Master进程做了几点解释：
1，因为Lookout很简单，只是有一个socket连接用于监听请求，所以forked Master进程共享的内存也很少。
2，另外使用fork原因是，当Master进程time out或者其他原因关闭时，Lookout接受到新请求，可以再次fork出新的Master。
3，如果不实用fork，在通过time out的方式退出Master进程的时候不能利用Unix系统来管理释放application code。
4，因为所有资源调用fork，都是直接调用系统的fork，所以可以确保Master进程退出后，内存可以很好的回收。
5，确保看顾进程可以使用非常少的内存。Master进程处理实际的请求，会消耗比较多内存，退出后系统可以对Master的内存进行回收。
6，比较简单的使Lookout进程作为守护进程。

回忆下守护进程的创建方式，第一次exit if fork，退出当前进程，然将fork出的子进程Process.setsid，使子进程变成新的进程组和会话组并且脱离终端控制。然后再次使用exit if fork，使再次fork出的进程不是进程组组长，也不是会话领导，同时没有控制终端，变成守护进程。1.9以后直接使用Process.daemon即可。

lib/spyglass/master.rb

#---
# Excerpted from "Working with Unix Processes",
# published by The Pragmatic Bookshelf.
# Copyrights apply to this code. It may not be used to create training material, 
# courses, books, articles, and the like. Contact us if you are in doubt.
# We make no guarantees that this code is fit for any purpose. 
# Visit http://www.pragmaticprogrammer.com/titles/jsunix for more book information.
#---
module Spyglass
  class Master
    include Logging

    def initialize(connection, socket)
      @connection, @socket = connection, socket
      @worker_pids = []
      
      # The Master shares this pipe with each of its worker processes. It
      # passes the writable end down to each spawned worker while it listens
      # on the readable end. Each worker will write to the pipe each time
      # it accepts a new connection. If The Master doesn't get anything on
      # the pipe before `Config.timeout` elapses then it kills its workers
      # and exits. 
      # 通过IO.pipe生成一对关联的IO管道。worker子进程共享write管道
      @readable_pipe, @writable_pipe = IO.pipe
    end

    # This method starts the Master. It enters an infinite loop where it creates
    # processes to handle web requests and ensures that they stay active. It takes
    # a connection as an argument from the Lookout instance. A Master will only 
    # be started when a connection is received by the Lookout.
    def start
      trap_signals

      load_app
      out "Loaded the app"

      # The first worker we spawn has to handle the connection that was already
      # passed to us.
      # fork worker子进程来处理socket
      spawn_worker(@connection)
      # The Master can now close its handle on the client socket since the
      # forked worker also got a handle on the same socket. Since this one
      # is now closed it's up to the Worker process to close its handle when
      # it's done. At that point the client connection will perceive that
      # it's been closed on their end.
      @connection.close
      
      # fork其他workers
      # We spawn the rest of the workers.
      (Config.workers - 1).times { spawn_worker }
      out "Spawned #{Config.workers} workers. Babysitting now..."

      loop do
        if timed_out?(IO.select([@readable_pipe], nil, nil, Config.timeout))
          out "Timed out after #{Config.timeout} s. Exiting."
          
          kill_workers(:QUIT)          
          exit 
        else
          # Clear the data on the pipe so it doesn't appear to be readable
          # next time around the loop.
          @readable_pipe.read_nonblock 1
        end
      end
    end

    def timed_out?(select_result)
      !select_result
    end

    def spawn_worker(connection = nil)
      @worker_pids << fork { Worker.new(@socket, @app, @writable_pipe, connection).start }
    end

    def trap_signals
      # The QUIT signal triggers a graceful shutdown. The master shuts down
      # immediately and lets each worker finish the request they are currently
      # processing.
      # Master进程退出时，将worker子进程退出。
      trap(:QUIT) do
        verbose "Received QUIT"

        kill_workers(:QUIT)
        exit
      end

      # worker子进程退出时
      trap(:CHLD) do
        # 阻塞式等待退出子进程
        dead_worker = Process.wait
        # 从列表中移除
        @worker_pids.delete(dead_worker)

        # 非阻塞的遍历等待其他worker_pid
        @worker_pids.each do |wpid|
          begin 
            dead_worker = Process.waitpid(wpid, Process::WNOHANG)
            @worker_pids.delete(dead_worker)
          rescue Errno::ECHILD
          end
        end

        spawn_worker
      end
    end
    
    def kill_workers(sig)
      @worker_pids.each do |wpid|
        Process.kill(sig, wpid)
      end
    end

    def load_app
      @app, options = Rack::Builder.parse_file(Config.config_ru_path)
    end
  end
end

lib/spyglass/worker.rb

#---
# Excerpted from "Working with Unix Processes",
# published by The Pragmatic Bookshelf.
# Copyrights apply to this code. It may not be used to create training material, 
# courses, books, articles, and the like. Contact us if you are in doubt.
# We make no guarantees that this code is fit for any purpose. 
# Visit http://www.pragmaticprogrammer.com/titles/jsunix for more book information.
#---
require 'time'
require 'rack/utils'

# Worker
# ======
#
module Spyglass
  class Worker
    include Logging

    def initialize(socket, app, writable_pipe, connection = nil)
      @socket, @app, @writable_pipe = socket, app, writable_pipe
      @parser = Spyglass::HttpParser.new

      handle_connection(connection) if connection
    end

    def start
      trap_signals

      loop do
        handle_connection @socket.accept
      end
    end

    def handle_connection(conn)
      verbose "Received connection"
      # This notifies our Master that we have received a connection, expiring
      # it's `IO.select` and preventing it from timing out.
      @writable_pipe.write_nonblock('.')

      # This clears any state that the http parser has lying around
      # from the last connection that was handled.
      @parser.reset

      # The Rack spec requires that 'rack.input' be encoded as ASCII-8BIT.
      empty_body = ''
      empty_body.encode!(Encoding::ASCII_8BIT) if empty_body.respond_to?(:encode!)

      # The Rack spec requires that the env contain certain keys before being
      # passed to the app. These are the keys that aren't provided by each
      # incoming request, server-specific stuff.
      env = { 
        'rack.input' => StringIO.new(empty_body),
        'rack.multithread' => false,
        'rack.multiprocess' => true,
        'rack.run_once' => false,
        'rack.errors' => STDERR,
        'rack.version' => [1, 0]
      }

      # This reads data in from the client connection. We'll read up to 
      # 10000 bytes at the moment.
      data = conn.readpartial(10000)
      # Here we pass the data and the env into the http parser. It parses
      # the raw http request data and updates the env with all of the data
      # it can withdraw.
      @parser.execute(env, data, 0)

      # Call the Rack app, goes all the way down the rabbit hole and back again.
      status, headers, body = @app.call(env)

      # These are the default headers we always include in a response. We
      # only speak HTTP 1.1 and we always close the client connection. At 
      # the monment keepalive is not supported.
      head = "HTTP/1.1 #{status}\r\n" \
      "Date: #{Time.now.httpdate}\r\n" \
      "Status: #{Rack::Utils::HTTP_STATUS_CODES[status]}\r\n" \
      "Connection: close\r\n"

      headers.each do |k,v|
        head << "#{k}: #{v}\r\n"
      end
      conn.write "#{head}\r\n"

      body.each { |chunk| conn.write chunk }
      body.close if body.respond_to?(:close)
      # Since keepalive is not supported we can close the client connection
      # immediately after writing the body.
      conn.close

      verbose "Closed connection"
    end

    def trap_signals
      trap(:QUIT) do
        out "Received QUIT"
        exit
      end
    end
  end
end

Spyglass 代码阅读

你可能感兴趣的:(Spyglass 代码阅读)