sem_timedwait 和修改系统时间
对于int sem_timedwait(sem_t *sem, const struct timespec *abs_timeout);
传入的第二个阻塞时间参数是绝对的时间戳。原意是timeout时间是墙上时间,到达那个时间点如果还没有等到信号量,那就timeout.
如果有人修改了系统时间,那么这个API的行为可能就不是你想要的了。
但是在一些Linux的系统里面,它的时间最终却是按照系统启动时间的行为来工作的,这个好像就是很多人歪打正着的正和他们的用意。当然我刚开始也是这样理所当然的使用着,我的代码一直都是可以不受修改时间的影响的。
直到前些日子我们做新的项目时,glibc升级成了2.30的(原来的是2.19)。结果修改时间后,我的一些关于sem_timedwait
代码逻辑就不对了: 有的要等很久才能timeout,如把时间改到过去了;有的就立刻timeout,如果时间改到未来了。
所以很大的可能是和glibc有关系了。于是仔细分析glibc不同版本的代码,我发现了如下的信息:
glibc: 2.19 |
glibc: 2.30 |
#include #include #include #include #include
#include #include
extern void __sem_wait_cleanup (void *arg) attribute_hidden;
/* This is in a seperate function in order to make sure gcc puts the call site into an exception region, and thus the cleanups get properly run. */ static int __attribute__ ((noinline)) do_futex_timed_wait (struct new_sem *isem, struct timespec *rt) { int err, oldtype = __pthread_enable_asynccancel ();
err = lll_futex_timed_wait (&isem->value, 0, rt, isem->private ^ FUTEX_PRIVATE_FLAG);//Paky: just use “CLOCK_MONOTONIC”!!!
__pthread_disable_asynccancel (oldtype); return err; }
int sem_timedwait (sem_t *sem, const struct timespec *abstime) { struct new_sem *isem = (struct new_sem *) sem; int err;
if (atomic_decrement_if_positive (&isem->value) > 0) return 0;
if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { __set_errno (EINVAL); return -1; }
atomic_increment (&isem->nwaiters);
pthread_cleanup_push (__sem_wait_cleanup, isem);
while (1) { struct timeval tv; struct timespec rt; int sec, nsec;
/* Get the current time. */ __gettimeofday (&tv, NULL);
/* Compute relative timeout. */ sec = abstime->tv_sec - tv.tv_sec; nsec = abstime->tv_nsec - tv.tv_usec * 1000; if (nsec < 0) { nsec += 1000000000; --sec; }
/* Already timed out? */ if (sec < 0) { __set_errno (ETIMEDOUT); err = -1; break; }
/* Do wait. */ rt.tv_sec = sec; rt.tv_nsec = nsec; err = do_futex_timed_wait(isem, &rt); if (err != 0 && err != -EWOULDBLOCK) { __set_errno (-err); err = -1; break; }
if (atomic_decrement_if_positive (&isem->value) > 0) { err = 0; break; } }
pthread_cleanup_pop (0);
atomic_decrement (&isem->nwaiters);
return err; } |
#include "sem_waitcommon.c"
/* This is in a separate file because because sem_timedwait is only provided if __USE_XOPEN2K is defined. */ int sem_timedwait (sem_t *sem, const struct timespec *abstime) { if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { __set_errno (EINVAL); return -1; }
/* Check sem_wait.c for a more detailed explanation why it is required. */ __pthread_testcancel ();
if (__new_sem_wait_fast ((struct new_sem *) sem, 0) == 0) return 0; else return __new_sem_wait_slow ((struct new_sem *) sem, CLOCK_REALTIME, abstime); } |
2.19 对应的程序的strace log: | 2.30 对应的程序的strace log: |
pid 2414] 15:31:00.844765 futex(0x37188, FUTEX_WAIT_PRIVATE, 0, {1, 999572500}) = -1 ETIMEDOUT (Connection timed out) <1.999733>
|
[pid 3405] 03:40:45.437693 futex(0xb2c490, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 1, {tv_sec=1879405212750659647, tv_nsec=859005174888}, FUTEX_BITSET_MATCH_ANY
|
从上面的表格可以看出,其实kernel是有多种timeout的:CLOCK_MONOTONIC和CLOCK_REALTIME. 但是到了glib2.30这个sem_timedwait才和它的说明手册的行为保持一致了。
那如果我还想用glibc2.19那种timeout的行为怎么办?两种方法:
把你的glibc降级到glibc2.19或者一下的版本。这个在linux的平台上应该就是
系统启动时间的时钟(
CLOCK_MONOTONIC)。
寻找时钟为
CLOCK_MONOTONIC的timeout的API。在新的glibc2.30里面已经有新的API了: int sem_clockwait (sem_t *sem, clockid_t clockid,const struct timespec *abstime) Behaves like sem_timedwait
except the time abstime is measured against the clock specified by clockid rather than CLOCK_REALTIME
. Currently, clockid must be either CLOCK_MONOTONIC
or CLOCK_REALTIME
.
截至目前这个问题应该是找到根本原因和解决方案了。所以我只需要根据当前的glibc的版本来决定我的代码要如何封装,比如通过编译选项来控制我们是否可以用新的API:sem_clockwait
#if __GLIBC__ >= 2 && __GLIBC_MINOR >= 30
//TRUE if the version of the GLib header files is the same as or newer than the passed-in version.
if (clock_type == 1)
{
while ( ( status = sem_clockwait( semId, CLOCK_MONOTONIC, &expire ) ) != 0 && errno == EINTR );
}
else
{
while ( ( status = sem_clockwait( semId, CLOCK_REALTIME, &expire ) ) != 0 && errno == EINTR );
}
#else
while ( ( status = sem_timedwait( semId, &expire ) ) != 0 && errno == EINTR );
#endif
下面的代码是个简单的测试代码是我当时的一个测试用例:
#include
#include
#include
#include
#include
#include
#include
#include
//# define CLOCK_REALTIME 0
/* Monotonic system-wide clock. */
//# define CLOCK_MONOTONIC 1
int GetTimeout(struct timespec *ts_caller, int clock_type, int delayTicks)
{
if (ts_caller != NULL)
{
long mswait;
struct timespec ts_cur;
//struct timespec ts_delay;
//mswait = (delayTicks*1000)/CLKTICKS_PER_SEC ;
if (clock_type == 1)
clock_gettime(CLOCK_MONOTONIC, &ts_cur);
else
clock_gettime(CLOCK_REALTIME, &ts_cur);
//ts_delay.tv_sec = (mswait / 1000 );
//ts_delay.tv_nsec = ( mswait % 1000 ) * 1000 * 1000;
//if( ts_cur.tv_nsec + ts_delay.tv_nsec > (1000*1000*1000) )/
//{
// ts_delay.tv_sec++;
// ts_delay.tv_nsec -= 1000*1000*1000;
//}
ts_caller->tv_sec = ts_cur.tv_sec + delayTicks;
ts_caller->tv_nsec = ts_cur.tv_nsec + 0;
return 0;
}
else
{
return -1;
}
}
int testSemWait(sem_t *semId, int clock_type, int timeout)
{
int status = -1;
struct timespec expire = {0, 0};
if ((status = sem_trywait(semId)) != 0) //need waiting..
{
GetTimeout(&expire, clock_type, timeout);
#if __GLIBC__ >= 2 && __GLIBC_MINOR >= 30
//TRUE if the version of the GLib header files is the same as or newer than the passed-in version.
if (clock_type == 1)
{
while ((status = sem_clockwait(semId, CLOCK_MONOTONIC, &expire)) != 0 && errno == EINTR)
;
}
else
{
while ((status = sem_clockwait(semId, CLOCK_REALTIME, &expire)) != 0 && errno == EINTR)
;
}
#else
while ((status = sem_timedwait(semId, &expire)) != 0 && errno == EINTR)
;
#endif
}
return status;
}
int main(int argc, char **argv)
{
int clock_type = 0;
int timeout = 30;
sem_t m_sem;
sem_init(&m_sem, 0, 0);
if (argc >= 2)
clock_type = atoi(argv[1]);
if (argc >= 3)
timeout = atoi(argv[2]);
printf("clock type is %d (1 for CLOCK_MONOTONIC, other for CLOCK_REALTIME), timeout:%d\n", clock_type, timeout);
std::string strTraceString;
time_t timeT;
struct tm stCurTime;
char cDateTime[128] = {0};
char cTid[64] = {0};
struct timeval testTime;
gettimeofday(&testTime, NULL);
timeT = testTime.tv_sec;
localtime_r(&timeT, &stCurTime);
sprintf(cDateTime, "\r\n%d-%02d-%02d%s%02d:%02d:%02d.%06lu", stCurTime.tm_year + 1900, stCurTime.tm_mon + 1,
stCurTime.tm_mday,
" ", stCurTime.tm_hour, stCurTime.tm_min, stCurTime.tm_sec, testTime.tv_usec);
printf("start time:%s\n", cDateTime);
testSemWait(&m_sem, clock_type, timeout);
gettimeofday(&testTime, NULL);
timeT = testTime.tv_sec;
localtime_r(&timeT, &stCurTime);
sprintf(cDateTime, "\r\n%d-%02d-%02d%s%02d:%02d:%02d.%06lu", stCurTime.tm_year + 1900, stCurTime.tm_mon + 1,
stCurTime.tm_mday,
" ", stCurTime.tm_hour, stCurTime.tm_min, stCurTime.tm_sec, testTime.tv_usec);
printf("end time:%s\n", cDateTime);
}
总结: 其实在软件升级的时候,我们经常会遇到一些看起来很奇怪的现象。但是这中间都是有根源了,就看你要不要打破沙锅追到底了。