以下的分析基于libvirt 3.0版本。
libvirt是一套免费,开源的支持linux下的主流虚拟化管理工具,目前有大量的应用程序构建在libvirt之上,很多虚拟化产品的开发都是灵活调用libvirt的API接口去实现的。对于应用程序,libvirt提供一套非阻塞调用的框架。
涉及相关的API:
virInitialize:初始化libvirt库,主要针对多线程编程
virEventRegisterDefaultImpl:基于poll系统调用注册默认事件实现,这是一个通用实现。
virEventRunDefaultImpl:循环运行事件,需要在线程中单独运行该函数
virConnectOpen:连接libvirt服务端,即libvirtd
virConnectSetKeepAlive:设置保活周期,此函数控制客户端发送keepalive消息。
相关的实现代码如下:
if (virInitialize()< 0) {
printf("callvirInitialize initialize libvirt fail\n");
return 1;
}
/* register defaultlibvirt event implement */
if (virEventRegisterDefaultImpl() < 0) { //必须
printf("failed to registerdefault event implementation\n");
return 2;
}
//创建线程去分发事件,必须
void*libvirt_thread_cb(void *data)
{
(void)data;
while (!isexit) {
virEventRunDefaultImpl();
}
pthread_exit((void*)"libvirt pthreadwill exit!!!");
return (void*)0;
}
//连接libvirt服务端,并且设计保活机制
contor =virConnectOpen(NULL);//参数为空,表示连接本地的libvirtd服务端
if (contor) {
//第二个参数,心跳发送周期; 第三个参数,心跳参数,当超过该次数时,连接断开
if (virConnectSetKeepAlive(contor, 10, 6)< 0) {
printf("failed to set connkeep alive config\n");
virConnectClose(contor);
contor = NULL;
}
}
通过以上的实现,后续就可以非阻塞调用libvirt其它的API接口,当libvirtd阻塞时,能够超时返回。
对于libvirtd端相关的心跳周期保存在libvirtd.conf文件中,可以修改参数,然后再重启libvirtd即可生效:
keepalive_interval = 10 //default is 5s
keepalive_count = 6 //default is 5s
相关的源码分析:
virEventRegisterDefaultImpl函数分析,源码如下:
int virEventRegisterDefaultImpl(void)
{
VIR_DEBUG("registering default event implementation");
virResetLastError();
if (virEventPollInit() < 0) {
virDispatchError(NULL);
return -1;
}
virEventRegisterImpl(
virEventPollAddHandle,
virEventPollUpdateHandle,
virEventPollRemoveHandle,
virEventPollAddTimeout,
virEventPollUpdateTimeout,
virEventPollRemoveTimeout
);
return 0;
}
通过源码分析,调用virEventRegisterImpl函数对全局函数赋值,为什么会这样做,下一步再分析
virConnectSetKeepAlive函数分析:
通过源码分析,最终会调用virKeepAliveStart函数去启用定时器,发送keepalive消息。
virKeepAliveStart(virKeepAlivePtr ka,
int interval,
unsigned int count)
{
int ret = -1;
time_t delay;
int timeout;
time_t now;
virObjectLock(ka);
if (ka->timer >= 0) {//如果定时器存在,不做处理
VIR_DEBUG("Keepalive messages already enabled");
ret = 0;
goto cleanup;
}
if (interval > 0) {
if (ka->interval > 0) {//心跳周期已设置,不在设置
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("keepalive interval already set"));
goto cleanup;
}
/* Guard against overflow */
if (interval > INT_MAX / 1000) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("keepalive interval %d too large"), interval);
goto cleanup;
}
ka->interval = interval;
ka->count = count;
ka->countToDeath = count;
}
if (ka->interval <= 0) {//心跳周期小于0,禁用keepalive
VIR_DEBUG("Keepalive messages disabled by configuration");
ret = 0;
goto cleanup;
}
PROBE(RPC_KEEPALIVE_START,
"ka=%p client=%p interval=%d count=%u",
ka, ka->client, interval, count);
now = time(NULL);
delay = now - ka->lastPacketReceived;
if (delay > ka->interval)
timeout = 0;
else
timeout = ka->interval - delay;
ka->intervalStart = now - (ka->interval - timeout);
ka->timer = virEventAddTimeout(timeout * 1000, virKeepAliveTimer,
ka, virObjectFreeCallback);//创建心跳定时器
if (ka->timer < 0)
goto cleanup;
/* the timer now has another reference to this object */
virObjectRef(ka);
ret = 0;
cleanup:
virObjectUnlock(ka);
return ret;
}
继续分析virEventAddTimeout函数,源码如下:
int
virEventAddTimeout(int timeout,
virEventTimeoutCallback cb,
void *opaque,
virFreeCallback ff)
{
if (!addTimeoutImpl)
return -1;
return addTimeoutImpl(timeout, cb, opaque, ff);
}
该函数实现很简单,调用全局的函数去设置定时器,addTimeoutImpl是全局的函数接口,这个函数的赋值是应用程式调用virEventRegisterDefaultImpl函数去设置的,这是为什么需要调用virEventRegisterDefaultImpl函数的原因。
如果没有提供以上的设置,为什么会阻塞,阻塞在哪个地方,通过gdb的调用,发现阻塞在virNetClientIOEventLoop函数中的poll系统调用上。堆栈如下:
(gdb) bt
#0 virNetClientIOEventLoop (client=0x2026d30, thiscall=0x1dc57a0) at ../../../src/rpc/virnetclient.c:1595
#1 0x00007fd5dc8368a3 in virNetClientIO (client=0x2026d30, thiscall=0x1dc57a0) at ../../../src/rpc/virnetclient.c:1950
#2 0x00007fd5dc8370b7 in virNetClientSendInternal (client=0x2026d30, msg=0x2026c60, expectReply=true, nonBlock=false) at ../../../src/rpc/virnetclient.c:2122
#3 0x00007fd5dc837141 in virNetClientSendWithReply (client=0x2026d30, msg=0x2026c60) at ../../../src/rpc/virnetclient.c:2150
#4 0x00007fd5dc838048 in virNetClientProgramCall (prog=0x2027150, client=0x2026d30, serial=8, proc=212, noutfds=0, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x7fd5dc82ce9f
args=0x7fffd92ffba0, ret_filter=0x7fd5dc82cf19
#5 0x00007fd5dc819442 in callFull (conn=0x1a24380, priv=0x1a77660, flags=0, fdin=0x0, fdinlen=0, fdout=0x0, fdoutlen=0x0, proc_nr=212, args_filter=0x7fd5dc82ce9f
args=0x7fffd92ffba0 "0T\002\002", ret_filter=0x7fd5dc82cf19
#6 0x00007fd5dc819515 in call (conn=0x1a24380, priv=0x1a77660, flags=0, proc_nr=212, args_filter=0x7fd5dc82ce9f
ret_filter=0x7fd5dc82cf19
#7 0x00007fd5dc7fd77b in remoteDomainGetState (domain=0x1dc8f60, state=0x7fffd92ffcdc, reason=0x0, flags=0) at ../../../src/remote/remote_driver.c:2458
#8 0x00007fd5dc7b5b2f in virDomainGetState (domain=0x1dc8f60, state=0x7fffd92ffcdc, reason=0x0, flags=0) at ../../../src/libvirt-domain.c:2495
virNetClientIOEventLoop函数分析:
static int virNetClientIOEventLoop(virNetClientPtr client,
virNetClientCallPtr thiscall)
{
struct pollfd fds[2];
int ret;
fds[0].fd = virNetSocketGetFD(client->sock);
fds[1].fd = client->wakeupReadFD;
for (;;) {
/* If we are non-blocking, then we don't want to sleep in poll() */
if (thiscall->nonBlock)
timeout = 0;
/* Limit timeout so that we can send keepalive request in time */
if (timeout == -1)
timeout = virKeepAliveTimeout(client->keepalive);//返回-1,导致poll阻塞
fds[0].events = fds[0].revents = 0;
fds[1].events = fds[1].revents = 0;
fds[1].events = POLLIN;
/* Calculate poll events for calls */
virNetClientCallMatchPredicate(client->waitDispatch,
virNetClientIOEventLoopPollEvents,
&fds[0]);
if (client->nstreams)
fds[0].events |= POLLIN;
repoll:
ret = poll(fds, ARRAY_CARDINALITY(fds), timeout);
if (ret < 0 && (errno == EAGAIN || errno == EINTR))
goto repoll;
}
通过源码可以分析,由于应用程序没有调用virConnectSetKeepAlive函数设置心跳保活机制,导致client->keepalive为NULL,分析virKeepAliveTimeout函数可知,当client->keepalive为NULL时,直接返回为-1;导致poll系统调用一直阻塞,直到有事件响应。
总结:通过以上设置,调用virConnectOpen函数连接libvirt的时,就可以实现非阻塞调用libvirt其它的API,当libvirt阻塞时,不会导致调用者阻塞。
当libvirt主线程阻塞时,上述的设置并不能解决virConnectOpen阻塞的问题,需要修改libvirt相关的代码。至于为什么,自己去思考,怎么解决这个问题?