1.开启RPC服务。
在当前进程中启动一个worker process作为子进程,当前只支持一个,所以要确保只被调用一次并且要在客户程序主循环中呼叫 worker_run() 和worker_wait() 。worker_start 定义在lib/worker.c,工作流程:通过socketpair函数(仅适用于Unix域套接字)创建一个流管道work_fds[0],work_fds[1],然后通过fcntl设置为非阻塞状态;接下来 fork_and_clean_up在fork子进程中做一些工作。daemonize_post_detach()如果有守护进程的相关配置,那么这个方法就会响应这些,即如果有 --detach ,--no-chdir的话那么detach=true,chdir_=false (关于这些定义在daemon.c中),然后关闭标准文件描述符。接下来就是子进程作为RPC server和主进程沟通(child:work_fd[1]<->parent:work_fds[0]) 。
void
worker_start(void){
int work_fds[2];
assert(client_sock < 0);
xsocketpair(AF_UNIX, SOCK_STREAM, 0, work_fds); //--->socketpair()
xset_nonblocking(work_fds[0]); // fcntl()
xset_nonblocking(work_fds[1]);
if (!
fork_and_clean_up()) {
/* In child (worker) process. */
daemonize_post_detach();
close(work_fds[0]);
worker_main(work_fds[1]);
NOT_REACHED();
}
/* In parent (main) process. */
close(work_fds[1]);
client_sock = work_fds[0];
rxbuf_init(&client_rx);
}
fork_and_clean_up定义在lib/daemon.c中,调用fork函数,并且在子进程中: 启动内部计时器,确保即使没有调用time_refresh()时间也会前进,fork生成的子进程不会继承父进程的内部计时器,所以要确保在fork()之后呼叫这个函数;lockfile_postfork()确保现在锁定的lockfile解开(就是关闭相应的文件描述符,而后从hmap locktable中移除),这在fork之后调用很有意义,因为
被fork创建的子进程不在持有父进程的锁。
/* Post-fork, but before returning, this function calls a few other functions that are generally useful if the child isn't planning to exec a new process. */
pid_t
fork_and_clean_up(void){
pid = fork();
if (pid > 0) {
/* Running in parent process. */
fatal_signal_fork(); //??
} else if (!pid) {
time_postfork();
lockfile_postfork();
} else {
VLOG_FATAL("fork failed (%s)", strerror(errno));
}
return pid;
}
在worker.c中定义的RPC 请求/回复header 和 接口实际上就是RPC server和client的通信协议
(在这里header和payload都符合openflow的那一套)。在worker_main中,初始化一个rxbuf,接着rxbuf_run()就会将接收信息,构造rx;然后就调用 request callback func(具体实现没看到?)。
static void
worker_main(int fd){
struct rxbuf rx;
server_sock = fd;
subprogram_name = "worker";
proctitle_set("worker process for pid %lu", (unsigned long int) getppid());
VLOG_INFO("worker process started");
rxbuf_init(&rx);
for (;;) {
int error;
error =
rxbuf_run(&rx, server_sock, sizeof(struct worker_request));
if (!error) {
request = *(struct worker_request *) rx.header.data;
expect_reply = request.reply_cb != NULL;
request.request_cb(&rx.payload, rx.fds, rx.n_fds);
assert(!expect_reply);
rxbuf_clear(&rx);
} else if (error == EOF && !rx.header.size) {
/* Main process closed the IPC socket. Exit cleanly. */
break;
} else if (error != EAGAIN) {
VLOG_FATAL("RPC receive failed (%s)", strerror(error));
}
poll_fd_wait(server_sock, POLLIN);
poll_block();
}
VLOG_INFO("worker process exiting");
exit(0);
}
/* Receive buffer for a RPC request or reply. */
struct rxbuf {
/* Header. */
struct ofpbuf header; /* Header data. */
int fds[SOUTIL_MAX_FDS]; /* File descriptors. */
size_t n_fds;
/* Payload. */
struct ofpbuf payload; /* Payload data. */
};
/* Buffer for holding arbitrary data. An ofpbuf is automatically reallocated
* as necessary if it grows too large for the available memory. */
struct ofpbuf {
void *base; /* First byte of allocated space. */
size_t allocated; /* Number of bytes allocated. */
enum ofpbuf_source source; /* Source of memory allocated as 'base'. */
void *data; /* First byte actually in use. */
size_t size; /* Number of bytes in use. */
void *l2; /* Link-level header. */
void *l3; /* Network-level header. */
void *l4; /* Transport-level header. */
void *l7; /* Application data. */
struct list list_node; /* Private list element for use by owner. */
void *private_p; /* Private pointer for use by owner. */
};
如果rx->header.size=0说明rpc缓存中没有数据,就呼叫recv_data_and_fds从Unix 域套接字sock(流管道)中接收数据和文件描述符;
如果rx->header.size < header_len说明rxbuf header 此时不空不足容纳一个rpc request header,说明之前的某个时机已经接收了一部分,所以接着接收请求头;在构造完成一个request header之后就要接收实际的数据payload。
static int
rxbuf_run(struct rxbuf *rx, int sock, size_t header_len){
for (;;) {
if (!rx->header.size) {
int retval;
ofpbuf_clear(&rx->header);
ofpbuf_prealloc_tailroom(&rx->header, header_len);
retval =
recv_data_and_fds(sock, rx->header.data, header_len, rx->fds, &rx->n_fds);
if (retval <= 0) {
return retval ? -retval : EOF;
}
rx->header.size += retval;
} else if (rx->header.size < header_len) {
size_t bytes_read;
int error;
error =
read_fully(sock, ofpbuf_tail(&rx->header), header_len - rx->header.size, &bytes_read);
rx->header.size += bytes_read;
if (error) {
return error;
}
} else {
size_t payload_len = *(size_t *) rx->header.data;
if (rx->payload.size < payload_len) {
size_t left = payload_len - rx->payload.size;
size_t bytes_read;
int error;
ofpbuf_prealloc_tailroom(&rx->payload, left);
error =
read_fully(sock, ofpbuf_tail(&rx->payload), left, &bytes_read);
rx->payload.size += bytes_read;
if (error) {
return error;
}
} else {
return 0;
}
}
}
return EAGAIN;
}
2. 创建unix socket control server 监听的sun_path 是path,可能的形式是:NULL,会默认使用<rundir>/<program>.<pid>.ctl;none,成功返回,但没有创建域套接字;没有以'/' 的name,会默认放在 <rundir>下;绝对路径名,比如说 /usr/local/var/run/openvswitch/db.sock。一个程序在守护进程配置后(*after* daemonization)应该呼叫unixctl_server_create,使得socket 包含的是守护进程的pid 而不是已经退出程序的pid。ovs-appctl --target=<program>命令是告诉appctl去运行哪个daemon,接收命令返回响应,
默认情况下,每个daemon都会监听一个unix domain socket /usr/local/var/run/program.pid.ctl ,比如ovs-vswitchd.5408.ctl 。
int
unixctl_server_create(const char *path, struct unixctl_server **serverp){
struct unixctl_server *server;
struct pstream *listener;
char *punix_path;
int error;
*serverp = NULL;
if (path && !strcmp(path, "none")) {
return 0;
}
if (path) {
char *abs_path = abs_file_name(ovs_rundir(), path);
punix_path = xasprintf("punix:%s", abs_path);
free(abs_path);
} else {
punix_path = xasprintf("punix:%s/%s.%ld.ctl", ovs_rundir(), program_name, (long int) getpid());
}
error =
pstream_open(punix_path, &listener, 0);
if (error) {
ovs_error(error, "could not initialize control socket %s", punix_path);
goto exit;
}
unixctl_command_register("help", "", 0, 0, unixctl_help, NULL);
unixctl_command_register("version", "", 0, 0, unixctl_version, NULL);
server = xmalloc(sizeof *server);
server->listener = listener;
list_init(&server->conns);
*serverp = server;
exit:
free(punix_path);
return error;
}
pstream_open(定义在lib/stream.c中)开始监听 remote stream connections,参数name形如TYPE:ARGS,TYPE是passive stream class(如,punix, pssl, ptcp),ARGS是对应流类型特定的参数,如果成功就将连接更新到 * pstreamp中。
int pstream_open(const char *name, struct pstream **pstreamp, uint8_t dscp){
const struct pstream_class *class;
struct pstream *pstream;
char *suffix_copy;
COVERAGE_INC(pstream_open); //利用 coverage instrumentation 加 1
// 在 pstream_classes中查找是否有对应的类型 type name -> pstream_class
error = pstream_lookup_class(name, &class);
if (!class) {
goto error;
}
//取出 参数中的ARGS,然后调用相应的listen函数:
suffix_copy = xstrdup(strchr(name, ':') + 1);
error = class->listen(name, suffix_copy, &pstream, dscp);
free(suffix_copy);
if (error) {
goto error;
}
*pstreamp = pstream;
return 0;
error:
*pstreamp = NULL;
return error;
}
static const struct pstream_class *pstream_classes[] = {
&ptcp_pstream_class,
&punix_pstream_class,
#ifdef HAVE_OPENSSL
&pssl_pstream_class,
#endif
};
比如看 punix_pstream_class (定义在 stream-unix.c中)的open函数其实就是创建unix域套接字并设为阻塞态,然后监听来自客户端的连接,呼叫new_fd_pstream 处理新的连接。
const struct pstream_class
punix_pstream_class = {
"punix",
false,
punix_open,
NULL, NULL, NULL, NULL,
};
static int
punix_open(const char *name OVS_UNUSED, char *suffix, struct pstream **pstreamp, uint8_t dscp OVS_UNUSED)
{
fd =
make_unix_socket(SOCK_STREAM, true, suffix, NULL);
if (
listen(fd, 10) < 0) {
error = errno;
VLOG_ERR("%s: listen: %s", name, strerror(error));
close(fd);
return error;
}
return
new_fd_pstream(name, fd, punix_accept, NULL, xstrdup(suffix), pstreamp);
}
new_fd_pstream创建一个新的名为name的pstream在fd上接收新的socket连接,存储在*pstreamp 中;当一个连接被accepte之后,
就会呼叫 accept_cb (里面的参数是accept()返回的新的fd和客户端的地址信息);成功之后会初识化一个 *streamp 来和客户端沟通(就像平时我们fork子进程来处理)。
int new_fd_pstream(const char *name, int fd,
int (*accept_cb)(int fd, const struct sockaddr *sa,
size_t sa_len, struct stream **streamp),
int (*set_dscp_cb)(int fd, uint8_t dscp),
char *unlink_path, struct pstream **pstreamp)
{
struct fd_pstream *ps = xmalloc(sizeof *ps);
pstream_init(&ps->pstream, &fd_pstream_class, name);
ps->fd = fd;
ps->accept_cb = accept_cb;
ps->set_dscp_cb = set_dscp_cb;
ps->unlink_path = unlink_path;
*pstreamp = &ps->pstream;
return 0;
}
static int
punix_accept(int fd, const struct sockaddr *sa, size_t sa_len, struct stream **streamp){
const struct sockaddr_un *sun = (const struct sockaddr_un *) sa;
int name_len = get_unix_name_len(sa_len);
char name[128];
if (name_len > 0) {
snprintf(name, sizeof name,
"unix:%.*s", name_len, sun->sun_path);
} else {
strcpy(name, "unix");
}
return
new_fd_stream(name, fd, 0, streamp);
}
new_fd_stream (lib/stream-fd.c) 创建形如 uinx:....的stream结构体,pstream_class 里面是fd, name , listen, accept 之类的,而stream_class 里面的域有 name, open , connect , close recv , send 等。 构造完成的 *streamp 通过其recv 和 send 在 new fd上收发数据。
int
new_fd_stream(const char *name, int fd, int connect_status, struct stream **streamp){
struct stream_fd *s; //stream_fd =stream + fd;
s = xmalloc(sizeof *s);
stream_init(&s->stream, &stream_fd_class, connect_status, name);
s->fd = fd;
*streamp = &s->stream;
return 0;
}
/* Active file descriptor stream. */
struct stream_fd
{
struct stream stream;
int fd;
};
/* Active stream connection. This structure should be treated as opaque by implementation. */
struct stream {
const struct
stream_class *class; // lib/stream-provider.h
int state;
int error;
ovs_be32 remote_ip;
ovs_be16 remote_port;
ovs_be32 local_ip;
ovs_be16 local_port;
char *name;
};
比如 unix_stream_class 的定义为,unix_open相当于客户端,构造域套接字,去连接服务器端即可。
不论是server还是client最终都会对每个session构造 stream class ,利用 stream_fd_class (lib/stream-fd.c)提供的方法来通信。
const struct stream_class unix_stream_class = {
"unix", /* name */
false, /* needs_probes */
unix_open, /* open */
NULL, /* close */
NULL, /* connect */
NULL, /* recv */
NULL, /* send */
NULL, /* run */
NULL, /* run_wait */
NULL, /* wait */
};
static const struct stream_class
stream_fd_class = {
"fd", /* name */
false, /* needs_probes */
NULL, /* open */
fd_close, /* close */
fd_connect, /* connect */
fd_recv, /* recv */
fd_send, /* send */
NULL, /* run */
NULL, /* run_wait */
fd_wait, /* wait */
};
比如 fd_send 就是BSD socket 发送数据。
static ssize_t fd_send(struct stream *stream, const void *buffer, size_t n){
struct stream_fd *s = stream_fd_cast(stream);
ssize_t retval;
if (STRESS(stream_flaky_send)) {
return -EIO;
}
retval = write(s->fd, buffer, n);
return (retval > 0 ? retval : retval == 0 ? -EAGAIN : -errno);
}