zabbix是一个实时监控系统,可以监控服务器的运行状态(cpu,内存,端口),并且可以发送报警邮件和短信,或者执行远程命令.由于工作上需要对zabbix进行二次开发,所以借此机会阅读了一下zabbix的源代码.zabbix前台是php实现的,后台是用c写的,按功能分为几个部分:agent,server,proxy,sender. zabbix_agent负责收集机器上的数据(比如cpu利用率,内存剩余量等)发送给zabbix_server,zabbix_server负责收集zabbix_agent或者zabbix_proxy发送过来的数据,更新数据库,而前台的php代码就可以从数据库内拿出这些数据进行显示。
先看zabbix_agent,它默认会启动三个进程:collector_thread、listener_thread、active_checks_thread
/* start the collector thread */
thread_args = (zbx_thread_args_t *)zbx_malloc(NULL, sizeof(zbx_thread_args_t));
thread_args->thread_num = thread_num;
thread_args->args = NULL;
threads[thread_num++] = zbx_thread_start(collector_thread, thread_args);
/* start listeners */
for (i = 0; i < CONFIG_ZABBIX_FORKS; i++)
{
thread_args = (zbx_thread_args_t *)zbx_malloc(NULL, sizeof(zbx_thread_args_t));
thread_args->thread_num = thread_num;
thread_args->args = &listen_sock;
threads[thread_num++] = zbx_thread_start(listener_thread, thread_args);
}
/* start active check */
if (0 == CONFIG_DISABLE_ACTIVE)
{
activechk_args.host = CONFIG_HOSTS_ALLOWED;
activechk_args.port = (unsigned short)CONFIG_SERVER_PORT;
thread_args = (zbx_thread_args_t *)zbx_malloc(NULL, sizeof(zbx_thread_args_t));
thread_args->thread_num = thread_num;
thread_args->args = &activechk_args;
threads[thread_num++] = zbx_thread_start(active_checks_thread, thread_args);
}
其中collector_thread会每隔1s收集本机的cpu状态存入结构体ZBX_CPUS_STAT_DATA中,将磁盘的状态存入结构体ZBX_DISKDEVICES_DATA
typedef struct
{
zbx_uint64_t h_counter[ZBX_CPU_STATE_COUNT][MAX_COLLECTOR_HISTORY];
unsigned char h_status[MAX_COLLECTOR_HISTORY];
int h_first;
int h_count;
int cpu_num;
}
ZBX_SINGLE_CPU_STAT_DATA;
typedef struct
{
ZBX_SINGLE_CPU_STAT_DATA *cpu;
int count;
}
ZBX_CPUS_STAT_DATA;
typedef struct c_single_diskdevice_data
{
char name[32];
int index;
time_t clock[MAX_COLLECTOR_HISTORY];
zbx_uint64_t r_sect[MAX_COLLECTOR_HISTORY];
zbx_uint64_t r_oper[MAX_COLLECTOR_HISTORY];
zbx_uint64_t r_byte[MAX_COLLECTOR_HISTORY];
zbx_uint64_t w_sect[MAX_COLLECTOR_HISTORY];
zbx_uint64_t w_oper[MAX_COLLECTOR_HISTORY];
zbx_uint64_t w_byte[MAX_COLLECTOR_HISTORY];
double r_sps[ZBX_AVG_COUNT];
double r_ops[ZBX_AVG_COUNT];
double r_bps[ZBX_AVG_COUNT];
double w_sps[ZBX_AVG_COUNT];
double w_ops[ZBX_AVG_COUNT];
double w_bps[ZBX_AVG_COUNT];
} ZBX_SINGLE_DISKDEVICE_DATA;
typedef struct c_diskdevices_data
{
int count;
ZBX_SINGLE_DISKDEVICE_DATA device[MAX_DISKDEVICES];
} ZBX_DISKDEVICES_DATA;
zabbix_agent和zabbix_server通信有两种方式:主动和被动,即它可以主动的发送信息给zabbix_server,也可以等zabbix_server请求时在发送相关的信息进行响应。默认情况这两个方式都开启了。另外,zabbix_server和zabbix_agent通信的协议是json格式的,详细格式请查阅官网文档。
listener_thread进程是负责被动方式,它负责监听10050端口,然后等待server端的请求并进行响应,请看下面的代码(listener.c)
static void process_listener(zbx_sock_t *s)
{
AGENT_RESULT result;
char *command;
char **value = NULL;
int ret;
if (SUCCEED == (ret = zbx_tcp_recv_to(s, &command, CONFIG_TIMEOUT)))
{
zbx_rtrim(command, "\r\n");
zabbix_log(LOG_LEVEL_DEBUG, "Requested [%s]", command);
init_result(&result);
process(command, 0, &result);
if (NULL == (value = GET_TEXT_RESULT(&result)))
value = GET_MSG_RESULT(&result);
if (NULL != value)
{
zabbix_log(LOG_LEVEL_DEBUG, "Sending back [%s]", *value);
ret = zbx_tcp_send_to(s, *value, CONFIG_TIMEOUT);
}
free_result(&result);
}
if (FAIL == ret)
zabbix_log(LOG_LEVEL_DEBUG, "Process listener error: %s", zbx_tcp_strerror());
}
值得一提的是从服务器接收的变量command,它其实就是我们在前台页面添加item时填的key,接下来有这么一个结构
ZBX_METRIC parameters_common[] =
/* KEY FLAG FUNCTION ADD_PARAM TEST_PARAM */
{
{"agent.ping", 0, AGENT_PING, 0, 0},
{"agent.version", 0, AGENT_VERSION, 0, 0},
{"system.localtime", 0, SYSTEM_LOCALTIME, 0, 0},
{"system.run", CF_USEUPARAM, SYSTEM_RUN, 0, "echo test"},
{"web.page.get", CF_USEUPARAM, WEB_PAGE_GET, 0, "localhost,,80"},
{"web.page.perf", CF_USEUPARAM, WEB_PAGE_PERF, 0, "localhost,,80"},
{"web.page.regexp", CF_USEUPARAM, WEB_PAGE_REGEXP, 0, "localhost,,80,OK"},
}
这个结构体将key和agent接收到key后执行的function联系到了一起
active_checks_thread进程是负责主动发送收集到的信息给zabbix_server的。除此之外,他还负责发送心跳包已经更新自己维护的一个active_checks条目表,部分代码(active.c:ZBX_THREAD_ENTRY(active_checks_thread, args))如下:
while (ZBX_IS_RUNNING())
{
if (time(NULL) >= nextsend)
{
send_buffer(activechk_args.host, activechk_args.port); //负责发送心跳包
nextsend = (int)time(NULL) + 1;
}
if (time(NULL) >= nextrefresh)
{
zbx_setproctitle("poller [getting list of active checks]");
if (FAIL == refresh_active_checks(activechk_args.host, activechk_args.port)) //更新自己维护的一个active_checks条目表
{
nextrefresh = (int)time(NULL) + 60;
}
else
{
nextrefresh = (int)time(NULL) + CONFIG_REFRESH_ACTIVE_CHECKS;
}
}
if (time(NULL) >= nextcheck && CONFIG_BUFFER_SIZE / 2 > buffer.pcount)
{
zbx_setproctitle("poller [processing active checks]");
process_active_checks(activechk_args.host, activechk_args.port); //主动发送收集到的信息给zabbix_server
if (CONFIG_BUFFER_SIZE / 2 <= buffer.pcount) /* failed to complete processing active checks */
continue;
nextcheck = get_min_nextcheck();
if (FAIL == nextcheck)
nextcheck = (int)time(NULL) + 60;
}
else
{
zabbix_log(LOG_LEVEL_DEBUG, "Sleeping for %d second(s)", 1);
zbx_setproctitle("poller [sleeping for %d second(s)]", 1);
zbx_sleep(1);
}
}
转载请注明出处:http://blog.csdn.net/liujian0616/article/details/7932323