参考资料:
源码: include/phenom/refcnt.h
typedef int ph_refcnt_t;
void ph_refcnt_add(ph_refcnt_t *ref)
// Returns true if we just released the final reference
bool ph_refcnt_del(ph_refcnt_t *ref)
引用计数管理对象的生命期
void ph_string_delref(ph_string_t *str)
{
if (!ph_refcnt_del(&str->ref)) {
return;
}
if (str->mt >= 0) {
ph_mem_free(str->mt, str->buf);
str->mt = PH_MEMTYPE_INVALID;
}
if (str->slice) {
ph_string_delref(str->slice);
str->slice = 0;
}
str->buf = 0;
if (!str->onstack) {
ph_mem_free(mt_string, str);
}
}
源码: corelib/counter.c; include/phenom/counter.h; tests/counter.c; corelib/debug_console.c
计数器。 用来了解事物发生的频率。实际用在memory, job子系统中。
scope就是一系列在逻辑上处于同一组的counter的集合概念。
在使用counter的时候最初就需要创建scope。
在定义scope的时候需要确定该scope内最多能有多少个counter注册进去,这个叫slot。
scope互相之间可以有父子继承关系。
我们要创建block的scenario只有两个:
当你在同一个线程内需要频繁进行计数器更新的时候;
当你在一个线程内对多个计数器进行更新,并期望这个操作尽可能快的时候;
开启debug-console, 可以输出系统的计数。
~$> echo counters | nc -UC /tmp/phenom-debug-console
iosched/dispatched 5144
iosched/timer_busy 0
iosched/timer_ticks 5035
memory.ares.channel/allocs 1
memory.ares.channel/bytes 104
memory.ares.channel/frees 0
memory.ares.channel/oom 0
上面的最高层的scope是memory和iosched。 memory的子scope是area, area的子scope是channel.
channel里面有4个slots, 分别记录了4个counter. name分别是alloc, bytes, frees, oom.
对应的counter分别是1, 104, 0, 0.
源码:corelib/memory.c; include/phenom/memory; tests/memory.c; corelib/debug_console.c
基于counter子系统的内存分配器。
通过下面2个函数注册新的memtype。
ph_memtype_t ph_memtype_register(const ph_memtype_def_t *def);
ph_memtype_t ph_memtype_register_block(
uint8_t num_types,
const ph_memtype_def_t *defs,
ph_memtype_t *types);
memtype支持的操作,malloc, realloc, free
void *ph_mem_alloc(ph_memtype_t memtype)
void *ph_mem_alloc_size(ph_memtype_t memtype, uint64_t size)
void *ph_mem_realloc(ph_memtype_t memtype, void *ptr, uint64_t size)
void ph_mem_free(ph_memtype_t memtype, void *ptr)
通过下面函数就可以了解内存的分配情况
void ph_mem_stat(ph_memtype_t memtype, ph_mem_stats_t *stats);
struct ph_mem_stats {
/* the definition */
const ph_memtype_def_t *def;
/* current amount of allocated memory in bytes */
uint64_t bytes;
/* total number of out-of-memory events (allocation failures) */
uint64_t oom;
/* total number of successful allocation events */
uint64_t allocs;
/* total number of calls to free */
uint64_t frees;
/* total number of calls to realloc (that are not themselves
* equivalent to an alloc or free) */
uint64_t reallocs;
};
开启debug-console, 可以输出内存使用情况 (非常酷)
$> echo memory | nc -UC /tmp/phenom-debug-console
WHAT BYTES OOM ALLOCS FREES REALLOC
threadpool/pool 832 0 1 0 0
threadpool/ringbuf 8480 0 2 0 0
hashtable/table 3136 0 3 0 0
hook/hook 8 0 1 0 0
hook/head 0 0 0 0 0
hook/string 19 0 1 0 0
hook/unreg 0 0 0 0 0
stream/stream 272 0 2 0 0
buffer/object 120 0 3 0 0
buffer/8k 16384 0 2 0 0
buffer/16k 0 0 0 0 0
buffer/32k 0 0 0 0 0
buffer/64k 0 0 0 0 0
buffer/vsize 0 0 0 0 0
buffer/queue 48 0 2 0 0
buffer/queue_ent 64 0 2 0 0
源码: corelib/string.c; include/phenom/string.c; tests/string.c;
设计目标: http://facebook.github.io/libphenom/#string
实现
typedef struct ph_string ph_string_t;
struct ph_string {
ph_refcnt_t ref; // 引用计数
ph_memtype_t mt;
uint32_t len, alloc; // 使用字节数,总字节数
char *buf; // 指向实际的存储
ph_string_t *slice;
bool onstack; // 是否在stack上
};
其中参数mt的值, 用负的表示stack-based growable,正的表示heap-allocated growable
ph_result_t ph_string_append_buf(ph_string_t *str,
const char *buf, uint32_t len)
{
if (len + str->len > str->alloc) {
// Not enough room
if (str->mt == PH_STRING_STATIC) {
// Just clamp to the available space
len = str->alloc - str->len;
} else {
// Grow it
uint32_t nsize = ph_power_2(str->len + len);
char *nbuf;
// Negative memtypes encode the desired memtype as the negative
// value. Allocate a buffer from scratch using the desired memtype
if (str->mt < 0) {
nbuf = ph_mem_alloc_size(-str->mt, nsize);
} else {
nbuf = ph_mem_realloc(str->mt, str->buf, nsize);
}
if (nbuf == NULL) {
return PH_NOMEM;
}
if (str->mt < 0) {
// Promote from static growable to heap allocated growable
memcpy(nbuf, str->buf, str->len);
str->mt = -str->mt;
}
str->buf = nbuf;
str->alloc = nsize;
}
}
memcpy(str->buf + str->len, buf, len);
str->len += len;
return PH_OK;
}
slice的创建
ph_string_t *ph_string_make_slice(ph_string_t *str,
uint32_t start, uint32_t len)
{
ph_string_t *slice;
if (start == 0 && len == str->len) {
ph_string_addref(str);
return str;
}
slice = ph_mem_alloc(mt_string);
if (!slice) {
return NULL;
}
ph_string_init_slice(slice, str, start, len);
return slice;
}
例如 memory.c 里有以下指令
PH_LIBRARY_INIT_PRI(memory_init, memory_destroy, 3)
include/phenom/defs.h定义
void ph_library_init_register(struct ph_library_init_entry *ent);
#define PH_LIBRARY_INIT_PRI(initfn, finifn, pri) \
static __attribute__((constructor)) \
void ph_defs_gen_symbol(ph__lib__init__)(void) { \
static struct ph_library_init_entry ent = { \
__FILE__, __LINE__, pri, initfn, finifn, 0 \
};
ph_library_init_register(&ent); \
}
attribute((constructor)), 使的函数体在main开始运行前,自动调用;
具体见 http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html;
所以meory_init, memory_destroy被注册
每1个使用libphenom的程序都要求,先调用ph_library_init,每1个注册init函数都被执行。
for (i = 0; i < num_init_ents; i++) {
struct ph_library_init_entry *ent = init_funcs[i];
if (ent->init) {
ent->init();
}
}
源码: 目录 corelib/streams; include/phenom/stream.h; tests/stream.c
libPhenom provides a portable layer over streaming IO.
CSAPP解释了标准IO为什么不能使用在socket上。
stream支持socket, ssl, fd, string.
实现
/** Represents a stream
*
* Streams maintain a buffer for read/write operations.
*/
struct ph_stream {
const struct ph_stream_funcs *funcs;
void *cookie;
unsigned flags;
pthread_mutex_t lock;
// if data is in the read buffer, these are non-NULL
unsigned char *rpos, *rend;
// if data is in the write buffer, these are non-NULL
unsigned char *wpos, *wend;
unsigned char *wbase;
// associated buffer. It can be either used in read mode
// or write mode, but not both
unsigned char *buf;
uint32_t bufsize;
int last_err;
ph_iomask_t need_mask;
};
/** Defines a stream implementation.
*
* If any of these return false, it indicates an error.
* The implementation must set stm->last_err to the corresponding
* errno value in that case (and only in the failure case).
*/
struct ph_stream_funcs {
bool (*close)(ph_stream_t *stm);
bool (*readv)(ph_stream_t *stm, const struct iovec *iov,
int iovcnt, uint64_t *nread);
bool (*writev)(ph_stream_t *stm, const struct iovec *iov,
int iovcnt, uint64_t *nwrote);
bool (*seek)(ph_stream_t *stm, int64_t delta,
int whence, uint64_t *newpos);
};
读写公用1个缓存区。 通过定义 struct ph_stream_funcs, 来支持不同流类型。
源码: corelib/buf.c; include/phenom/buffer.h; tests/buf.c
设计目标: http://facebook.github.io/libphenom/index.html#buffer;
ph_buf_t作为ph_bufq_t的底层实现,并没有单独使用
struct ph_buf {
ph_refcnt_t ref;
ph_buf_t *slice;
uint8_t *buf;
uint64_t size;
ph_memtype_t memtype;
};
ph_buf_t *ph_buf_new(uint64_t size);
ph_buf_t *ph_buf_slice(ph_buf_t *buf, uint64_t start, uint64_t len);
ph_buf_new 创建1个新的buffer, 新的buffer的大小,调用函数select_size。主要分为8192,16k, 32k等。
ph_buf_slice 创建1个slice, slice实际上没有分配内存。
特殊情况:start=0, len等于buf的长度,只是ph_buf_addref(buf).
buf子系统中不同的内存分配,都分别区分开。
static ph_memtype_def_t defs[] = {
{ "buffer", "object", sizeof(ph_buf_t), PH_MEM_FLAGS_ZERO },
{ "buffer", "8k", 8*1024, 0 },
{ "buffer", "16k", 16*1024, 0 },
{ "buffer", "32k", 32*1024, 0 },
{ "buffer", "64k", 64*1024, 0 },
{ "buffer", "vsize", 0, 0 },
{ "buffer", "queue", sizeof(ph_bufq_t), PH_MEM_FLAGS_ZERO },
{ "buffer", "queue_ent", sizeof(struct ph_bufq_ent), PH_MEM_FLAGS_ZERO },
};
ph_bufq_t,用作socket的用户层buffer。
struct ph_bufq_ent {
PH_STAILQ_ENTRY(ph_bufq_ent) ent;
ph_buf_t *buf;
// Offset into the buf of the data that is yet to be consumed
uint64_t rpos;
// Offset at which to append further data
uint64_t wpos;
};
struct ph_bufq {
PH_STAILQ_HEAD(bufqhead, ph_bufq_ent) fifo;
// Maximum amount of storage to allow
uint64_t max_size; // 现在好像没有用? 20131114
};
ph_bufq_t *ph_bufq_new(uint64_t max_size);
ph_result_t ph_bufq_append(ph_bufq_t *q, const void *buf, uint64_t len,
uint64_t *added_bytes);
ph_buf_t *ph_bufq_consume_bytes(ph_bufq_t *q, uint64_t len);
ph_buf_t *ph_bufq_consume_record(ph_bufq_t *q, const char *delim,
uint32_t delim_len);
ph_bufq_new 创建出1个定长buffer的fifo. 默认会在fifo里放1个8192长度的buffer.
ph_bufq_append 对ph_bufq_t插入数据. 如果最后1个buffer容量不够,就会创建出1个新的buffer, 放到fifo里.
ph_bufq_consume_bytes 从ph_bufq_t读出数据。gc_bufq用来释放资源。 返回的ph_buf_t是重新创建的。
ph_bufq_consume_record 读取数据到指定的record. 例如读取到”\r\n”. 调用函数find_record,需要很有耐心的实现。
源码:include/phenom/json.h; 目录 corelib/variant/
提供了json的encoding,decoding的功能。
源码: phenom/configuration.h; corelib/config.c
程序启动时有全局的配置文件(json格式), 修改程序一些行为。
该文件可以通过ph_config_load_config_file或者 getenv(“PHENOM_CONFIG_FILE”)来指定。
例如job.c 里, 可以设置下面的参数来指定sleep时间
int max_sleep = ph_config_query_int("$.nbio.max_sleep", 5000);
建议应用自己的配置在路径 “$.app.”下
源码: include/phenom/timerwheel.h, corelib/timerwheel.c
timer wheel, 是一种定时器实现机制。概念来自”Hashed and Hierarchical Timing Wheels”.
用来管理大量的定时器。Linux内核中也用这种实现。
定时轮的工作原理可以类比于时钟,如上图; 指针按某一个方向按固定频率轮动,每一次跳动称为一个tick。
这样可以看出定时轮由个3个重要的属性参数,ticksPerWheel(一轮的tick数),tickDuration(一个tick的持续时间)
以及 timeUnit(时间单位),例如 当ticksPerWheel=60,tickDuration=1,timeUnit=秒,这就和现实中的始终的秒针走动完全类似了。
实现
PH_LIST_HEAD( // 双向的循环链表head, 具体见phenom/queue.h
ph_timerwheel_list,
ph_timerwheel_timer);
struct ph_timerwheel_timer {
PH_LIST_ENTRY(ph_timerwheel_timer) t;
struct ph_timerwheel_list *list;
struct timeval due;
int enable;
#define PH_TIMER_DISABLED 0
#define PH_TIMER_ENABLED 1
#define PH_TIMER_LOCKED 2
};
#define PHENOM_WHEEL_BITS 8
#define PHENOM_WHEEL_SIZE (1 << PHENOM_WHEEL_BITS) // 256
struct ph_timerwheel {
struct timeval next_run; // 下1个tick的实际时间
uint32_t tick_resolution; // 每个tick的时间间隔
ck_rwlock_t lock;
struct {
struct ph_timerwheel_list lists[PHENOM_WHEEL_SIZE];
} buckets[4];
};
ph_timerwheel提供了4个buckets, buckets存在着类似时分秒的进位关系;
下面用TV1标识buckets[0], 以此类推, TV4标识buckets[3];
TV1为第1个表,所表示的计时是 1 ~ 255 tick.
因为在一个tick上可能同时有多个timer等待超时处理,
使用ph_timerwheel_list将所有timer 串成一个链表,以便在超时时顺序处理;
TV2为第2个表, 所表示的计时是 256 ~ 65535 tick.
以此类推TV3, TV4;
在nbio子系统中,tick_resolution=100ms,每过100ms, 每1个事件循环会触发ph_timerwheel_tick函数。
用来处理下一个tick所在的所有timer.
ph_timerwheel_tick(ph_timerwheel_t *wheel,
struct timeval now,
ph_timerwheel_should_dispatch_func_t should_dispatch,
ph_timerwheel_dispatch_func_t dispatch,
void *arg)
idx 是用来遍历 TV1 的索引。每一次循环idx会定位一个当前待处理的 tick,并处理这个tick下所有超时的timer。
wheel->next_run会在每次循环后增加一个 tick_resolution,index也会随之向前移动。当index变为0时表示TV1完成了一次完整的遍历,
此时所有在 TV1 中的 timer 都被处理了,因此需要通过 cascade 将后面 TV2,TV3 等 timer list 中的timer向前移动,类似于分转成秒的操作。
这种层叠的 timer list 实现机制可以大大降低每次检查超时, timer的时间,每次中断只需要针对 TV1 进行检查,只有必要时才进行cascade。
timer wheel一个弊端就是 cascade 开销过大。 在极端的条件下,同时会有多个TV需要进行cascade处理,会产生很大的时延。
这也是为什么说timeout类型的定时器是timer wheel 的主要应用环境,或者说timer wheel 是为 timeout 类型的定时器优化的。
因为timeout类型的定时器的应用场景多是错误条件的检测,这类错误发生的机率很小,通常不到超时就被删除了,因此不会产生cascade的开销。
nbio子系统,
初始化过程,ph_nbio_init–> ph_timerwheel_init(&emitters[i].wheel, me->now, WHEEL_INTERVAL_MS);
函数ph_nbio_emitter_init中, 每1个emitter,创建1个timerfd,100ms后,定时器超时,timefd成为可读,触发回调函数tick_epoll;
emitter->timer_fd = timerfd_create(
CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
if (emitter->timer_fd == -1) {
ph_panic("timerfd_create(CLOCK_MONOTONIC) failed: `Pe%d", errno);
}
memset(&ts, 0, sizeof(ts));
ts.it_interval.tv_nsec = WHEEL_INTERVAL_MS * 1000000;
ts.it_value.tv_nsec = ts.it_interval.tv_nsec;
timerfd_settime(emitter->timer_fd, 0, &ts, NULL);
ph_job_init(&emitter->timer_job);
emitter->timer_job.callback = tick_epoll;
emitter->timer_job.fd = emitter->timer_fd;
emitter->timer_job.data = emitter;
emitter->timer_job.emitter_affinity = emitter->emitter_id;
ph_job_set_nbio(&emitter->timer_job, PH_IOMASK_READ, 0);
调用顺序: ph_nbio_emitter_init -> ph_job_set_nbio -> tick_epoll -> ph_nbio_emitter_timer_tick -> ph_timerwheel_tick
源码: include/phenom/hashtable.h;corelib/hash; tests/hashtable.c;
struct ph_ht {
uint32_t nelems;
uint64_t table_size, elem_size, mask;
const struct ph_ht_key_def *kdef;
const struct ph_ht_val_def *vdef;
/* points to the table, an array of table_size elements */
char *table;
};
ph_result_t ph_ht_init(ph_ht_t *ht, uint32_t size_hint,
const struct ph_ht_key_def *kdef,
const struct ph_ht_val_def *vdef)
{
ht->kdef = kdef;
ht->vdef = vdef;
ht->nelems = 0;
ht->table_size = ph_power_2(size_hint * 2);
ht->elem_size = sizeof(struct ph_ht_elem) + kdef->ksize + vdef->vsize;
ht->mask = ht->table_size - 1;
ht->table = ph_mem_alloc_size(mt_table, ht->elem_size * ht->table_size);
if (!ht->table) {
return PH_NOMEM;
}
return PH_OK;
}
采用的是linear probing的实现。 hash桶的大小在ph_ht_init的时候传入。
如果桶满了, insert就会失败。 需要显性地调用。
ph_ht_grow来手动建立hash表, 没有rehash的过程。
ph_hash_bytes_murmur函数实现了Murmur Hash算法。
源码: include/phenom/thread.h; corelib/thread.c
struct ph_thread {
bool refresh_time;
// internal monotonic thread id
uint32_t tid;
PH_STAILQ_HEAD(pdisp, ph_job) pending_nbio, pending_pool;
struct ph_nbio_emitter *is_emitter;
int is_worker;
struct timeval now;
ck_epoch_record_t epoch_record;
ck_hs_t counter_hs;
// linkage so that a stat reader can find all counters
ck_stack_entry_t thread_linkage;
// OS level representation
pthread_t thr;
// If part of a pool, linkage in that pool
CK_LIST_ENTRY(ph_thread) pool_ent;
pid_t lwpid;
#ifdef HAVE_STRERROR_R
char strerror_buf[128];
#endif
// Name for debugging purposes
char name[16];
};
Phenom线程上记录了
每个phenom线程分配一个全局唯一的id,对应一个pthread线程。 如注释所说,tid < MAX_RINGS的phenom线程称为preferred thread, 拥有自己专用的job队列,其他线程竞争共享队列,用spinlock同步。
全局的pools将所有线程池保存在链表中。其中包含用于consumer和producer等待/唤醒的结构(futex或condition variable), 保存job的ring buffer、worker线程的指针等等信息。
ph_thread_spawn(func, arg)创建一个ph_thread_t线程。 实际上是调用pthread_create(),让其执行ph_thread_boot(),将实际要执行的函数func() 和参数arg等信息传入。ph_thread_boot()会分配内存并创建一个新的ph_thread_t结构, 执行一些初始化,然后调用传入的那个func()。
此外,封装了join、self、setaffinity等等pthread操作。
1个进程中线程直接除了线程自己的栈和寄存器之外,其他几乎都是共享的,如果线程想维护一个只属于线程自己的全局变量怎么办?
线程的私有存储解决了这个问题。
ph_thread_self函数就用这个方式取得线程自己的句柄
job有3类
源码
ph_nbio_init()初始化NBIO。
每个emitter绑定了1个事件循环
struct ph_nbio_emitter {
ph_timerwheel_t wheel; // 时间轮
ph_job_t timer_job;
uint32_t emitter_id;
struct timeval last_dispatch;
int io_fd, timer_fd;
ph_nbio_affine_job_stailq_t affine_jobs; // typedef PH_STAILQ_HEAD(affine_ent, ph_nbio_affine_job)
ph_job_t affine_job;
ph_pingfd_t affine_ping; // 用来唤醒epoll
ph_thread_t *thread; // 跟thread绑定在一起
ph_counter_block_t *cblock; // 计数器
};
struct ph_job {
// data associated with job
void *data;
// the callback to run when the job is dispatched
ph_job_func_t callback;
// deferred apply list
PH_STAILQ_ENTRY(ph_job) q_ent;
// whether we're in a deferred apply
bool in_apply;
// for PH_RUNCLASS_NBIO, trigger mask */
ph_iomask_t mask;
// use ph_job_get_kmask() to interpret
int kmask;
// Hashed over the scheduler threads; two jobs with
// the same emitter hash will run serially wrt. each other
uint32_t emitter_affinity;
// For nbio, the socket we're bound to for IO events
ph_socket_t fd;
// Holds timeout state
struct ph_timerwheel_timer timer;
// When targeting a thread pool, which pool
ph_thread_pool_t *pool;
// for SMR
ck_epoch_entry_t epoch_entry;
struct ph_job_def *def;
};
ph_sched_run调度NBIO
job加入NBIO
通过ph_job_set_nbio加入JOB (ph_job_set_nbio_timeout_in实际调用ph_job_set_nbio
放入pending_nbio队列的job, 通过ph_sched_run –> process_deferred –> ph_nbio_emitter_apply_io_mask;
加入事件循环中
源码: include/phenom/thread.h; corelib/job.h; corelib/job.c; tests/tpool.c
struct ph_thread_pool {
struct ph_thread_pool_wait consumer CK_CC_CACHELINE;
uint32_t max_queue_len;
ck_ring_t *rings[MAX_RINGS+1];
intptr_t used_rings;
ck_spinlock_t lock CK_CC_CACHELINE;
char pad1[CK_MD_CACHELINE - sizeof(ck_spinlock_t)];
struct ph_thread_pool_wait producer CK_CC_CACHELINE;
int stop;
char *name;
ph_counter_scope_t *counters;
CK_LIST_ENTRY(ph_thread_pool) plink;
ph_thread_t **threads;
uint32_t max_workers;
uint32_t num_workers;
ph_variant_t *config;
};
job分发的过程
* ph_thread_pool_define 定义1个pool;
* 函数ph_job_set_pool; job->pool = pool; 关联job和pool; PH_STAILQ_INSERT_TAIL(&me->pending_pool, job, q_ent); 放入当前线程的队列
* 执行 ph_sched_run –> process_deferred –> _ph_job_set_pool_immediate –> do_set_pool
* tid < MAX_RINGS有自己单独的ring, 其他的共享1个ring.
* wake_pool(&pool->consumer); 通知worker线程。
job的处理
* ph_sched_run –> _ph_job_pool_start_threads –> ph_thread_pool_start_workers –> worker_thread
ph_thread_pool_signal_stop函数用来终止
源码:
libphenom对socket io进行了封装。包括描述符ph_socket_t, 通用的地址结构phenom_sockaddr,
ph_sock_t封装了读写buffer、用于NBIO的job结构、超时时长、事件发生后的callback等信息。
ph_sock_t由NBIO pool管理。
解析域名并发起连接的过程:
struct resolve_and_connect {
ph_sockaddr_t addr;
ph_socket_t s;
int resolve_status;
int connect_status;
uint16_t port;
struct timeval start, timeout, elapsed;
void *arg;
ph_sock_connect_func func;
};
def ph_sock_resolve_and_connect(name, port, timeout, resolver, func,, args):
rac = ph_mem_alloc(mt.resolve_and_connect)
rac.func = func;
rac.arg = arg;
rac.start = ph_time_now();
rac.port = port;
if timeout:
rac.timeout = timeout
else:
rac.timeout = 60 # 默认60s超时
if ph_sockaddr_set_v4(rac.addr, name, port) == PH_OK: # 如果name是IP地址
attempt_connect(rac)
return
# 根据resolver采用不同的解析域名的方式,
rac.addr = dns_getaddrinfo(resolver)
attempt_connect(rac)
def attempt_connect(rac):
# 建立socket对象
rac.s = ph_socket_for_addr(rac.addr, SOCK_STREAM, PH_SOCK_CLOEXEC|PH_SOCK_NONBLOCK)
ph_socket_connect(rac.s, rac.addr, rac.timeout, connected_sock, rac)
struct connect_job {
ph_job_t job;
ph_socket_t s;
ph_sockaddr_t addr;
int status;
struct timeval start;
void *arg;
ph_socket_connect_func func;
};
def ph_socket_connect(s, addr, timeout, func, arg):
# connect_job_template = { callback = connect_complete, memtype = mt.connect_job}
job = (struct connect_job*)ph_job_alloc(connect_job_template)
job.s, job.addr, job.func, job.arg = s, addr, func, arg
job.start = ph_time_now();
res = connect(s, job.addr ...) # man 2 connect
if (...) #
# 如果s对应fd是异步方式,使用事件回调机制, 回调函数是connect_complete
job.job.fd = s
job.job.callback = connect_complete
job.job.data = job
ph_job_set_nbio_timeout_in(&job->job, PH_IOMASK_WRITE,
timeout ? *timeout : default_timeout);
return;
# 同步IO, 直接调用connected_sock
done = job.stat - now
func(s, addr, res == 0 ? 0 : errno, done, arg);
def connect_complete(ph_job_t *j, ph_iomask_t why, void *data):
struct connect_job *job = data
if why == PH_IOMASK_TIME:
status = ETIMEDOUT
# 回调之前注册的函数, connected_sock
job.func(job.s, job.addr, status, done, job.arg)
def connected_sock(s, addr, status, elapsed, arg):
struct resolve_and_connect *rac = arg;
sock = ph_sock_new_from_socket(s, NULL, addr)
calc_elapsed(rac)
# 回调用户定义的函数 , 类型是ph_sock_connect_func
rac.func(sock, PH_SOCK_CONNECT_SUCCESS, 0, addr, rac.elapsed, rac.arg);
ph_sock_t, 对1个socket连接的抽象:
struct ph_sock {
// Embedded job so we can participate in NBIO
ph_job_t job;
// Buffers for output, input
ph_bufq_t *wbuf, *rbuf;
// The per IO operation timeout duration
struct timeval timeout_duration;
// A stream for writing to the underlying connection
ph_stream_t *conn;
// A stream representation of myself. Writing bytes into the
// stream causes the data to be buffered in wbuf
ph_stream_t *stream;
// Dispatcher
ph_sock_func callback;
bool enabled;
// sockname, peername as seen from this host.
// These correspond to the raw connection we see; if we are
// proxied, these are the names of our connection to the proxy.
// If we are not proxied, these are the same as the equivalents below
ph_sockaddr_t via_sockname, via_peername;
// sockname, peername as seen from the connected peer
// These are the actual outgoing address endpoints, independent of
// any proxying that may be employed
ph_sockaddr_t sockname, peername;
// If we've switched up to SSL, holds our SSL context
SSL *ssl;
ph_stream_t *ssl_stream;
ph_sock_openssl_handshake_func handshake_cb;
ph_bufq_t *sslwbuf;
};
// 创建ph_sock_t
// connected_sock, accept_dispatch函数调用
def ph_sock_new_from_socket(ph_socket_t s, ph_sockaddr_t *sockname, ph_sockaddr_t *peername):
# sock_job_template = {sock_dispatch, mt.sock}, 分配的结构体是ph_sock_t
sock = (ph_sock_t*)ph_job_alloc(&sock_job_template)
# 读写buf默认大小为128k
max_buf = ph_config_query_int("$.socket.max_buffer_size", MAX_SOCK_BUFFER_SIZE)
sock->wbuf = ph_bufq_new(max_buf)
sock->rbuf = ph_bufq_new(max_buf)
sock->conn = ph_stm_fd_open(s, 0, 0)
sock->stream = ph_stm_make(&sock_stm_funcs, sock, 0, 0)
# sockname记录本地地址, peer记录对端地址
sock->sockname = *sockname
sock->peername = *peername
# 默认60s超时
sock->timeout_duration.tv_sec = 60
return sock
// 加入nbio的方式, 以ph_sock_connect_func回调取例
def connect_cb(ph_sock_t *sock, ...):
# 设置回调函数, 并开启
sock->callback = remote_cb
ph_sock_enable(sock, true);
// 当ph_sock对应的fd有event发生时,nbio回调的入口函数是
def sock_dispatch(j, why, data):
# SSL暂时不关心, 先skip这些代码
ph_sock_t *sock = (ph_sock_t*)j;
sock->conn->need_mask = 0;
// 把wbuf里缓存的数据写入fd
try_send(sock)
// 从系统中读取数据到rbuf
try_read(sock)
// 设置对应的mask, 回调用户注册的函数
// ....
sock->callback(sock, why, data);
// 释放ph_socket_t,当发现需要关闭连接时
ph_sock_shutdown(sock, PH_SOCK_SHUT_RDWR);
// 如果sock->job.data之前有malloc数据,这里需要释放
ph_mem_free(mt_state, state);
ph_sock_free(sock);
sock的读写: