__ocfs2_cluster_lock()

先看下ftrace, 可惜ocfs2_wait_for_mask不能ftrace,也不知到为什么?不过,它直接调用了wait_for_complete,所以就用这个函数代替了,这个函数浪费的时间最多了!

 0)               |  __ocfs2_cluster_lock() {
 0)               |    wait_for_completion() {
 ------------------------------------------
 0) iomaker-10882  => ocfs2dc-10793 
 ------------------------------------------

 0)   0.000 us    |  ocfs2_dlm_lock();
 ------------------------------------------
 0) ocfs2dc-10793  => iomaker-10882 
 ------------------------------------------

 0) ! 11609.94 us |    }
 0)   0.000 us    |    ocfs2_dlm_lock();
 0) ! 443.137 us  |    wait_for_completion();
 0) ! 12053.08 us |  } /* __ocfs2_cluster_lock */

这个函数分析起来,没那么容易,又长又臭。 分片过代码吧:

1362 static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1363                                 struct ocfs2_lock_res *lockres,
1364                                 int level,
1365                                 u32 lkm_flags,
1366                                 int arg_flags,
1367                                 int l_subclass,
1368                                 unsigned long caller_ip)
1369 {
// lockres是->ip_inode_lockres, level=EX, lkm_flags=0, arg_flags=0, subclass=IO_LS_NORMAL
// caller_ip=__RET_IP_,不知道为什么需要这个参数?
1370         struct ocfs2_mask_waiter mw;
1371         int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1372         int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1373         unsigned long flags;
1374         unsigned int gen;
1375         int noqueue_attempted = 0;
1376 
//ocfs2_mask_waiter结构设计的挺巧妙;mask暗指标志位掩码,lockres->l_flags有许多标志位,如OCFS2_LOCK_BUSY,
//OCFS2_LOCK_BLOCKED,OCFS2_LOCK_PENDING,etc.,->mw_mask用来指示哪一个bit,->mw_goal用来表示希望这个
//bit是0或1; waiter就意味着wait_for_completion这个位变成我们想要的值。
1377         ocfs2_init_mask_waiter(&mw);
1378 
//ocfs2_inode_inode_lops->flags=LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB
//所以if成立
1379         if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1380                 lkm_flags |= DLM_LKF_VALBLK;

割...

1382 again:
//哪些情况会goto到这儿?
//#1493行,从ocfs2_dlm_lock中成功返回,但是BUSY标记还没有清除掉,这意味着ast还没被调用或返回,因为所有类型的ast都去清除这个标记;
//#1520行,因为args_flags=0,#1514行的if语句不成立,所以这行根本执行不到;
//#1525行,1524行if语句一定成立,即ret=0
1383         wait = 0;
1384 
1385         spin_lock_irqsave(&lockres->l_lock, flags);
1386 
1387         if (catch_signals && signal_pending(current)) {
1388                 ret = -ERESTARTSYS;
1389                 goto unlock;
1390         }
1391 
1392         mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1393                         "Cluster lock called on freeing lockres %s! flags "
1394                         "0x%lx\n", lockres->l_name, lockres->l_flags);
1395 
1396         /* We only compare against the currently granted level
1397          * here. If the lock is blocked waiting on a downconvert,
1398          * we'll get caught below. */
1399         if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1400             level > lockres->l_level) {
//BUSY表示还有dlm lock请求没有返回,必须等着...
1401                 /* is someone sitting in dlm_lock? If so, wait on
1402                  * them. */
1403                 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1404                 wait = 1;
1405                 goto unlock;
//unlock处,#1502行,不能理解!!! 总之,很快就进入等待函数了...
1406         }
1407 
1408         if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1409                 /*
1410                  * We've upconverted. If the lock now has a level we can
1411                  * work with, we take it. If, however, the lock is not at the
1412                  * required level, we go thru the full cycle. One way this could
1413                  * happen is if a process requesting an upconvert to PR is
1414                  * closely followed by another requesting upconvert to an EX.
1415                  * If the process requesting EX lands here, we want it to
1416                  * continue attempting to upconvert and let the process
1417                  * requesting PR take the lock.
1418                  * If multiple processes request upconvert to PR, the first one
1419                  * here will take the lock. The others will have to go thru the
1420                  * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1421                  * downconvert request.
1422                  */
//这段注释非常清楚
1423                 if (level <= lockres->l_level)
1424                         goto update_holders;
1425         }
1426 
1427         if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1428             !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
//BLOCKED: blocked waiting for downconvert;
//ocfs2_may_continue_on...在想要的锁和->l_locking兼容,返回1; 我猜是为了避免重复等待
1429                 /* is the lock is currently blocked on behalf of
1430                  * another node */
1431                 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1432                 wait = 1;
1433                 goto unlock;

割...

1436         if (level > lockres->l_level) {
//申请的锁级别要高于当前granted lock level
1437                 if (noqueue_attempted > 0) {
//noqueue_attempted一直等于0,所以可以无视这个if语句;
1438                         ret = -EAGAIN;
1439                         goto unlock;
1440                 }
//lkm_flags不会将DLM_LKF_NOQUEUE置位,所以也可以无视这个if语句
1441                 if (lkm_flags & DLM_LKF_NOQUEUE)
1442                         noqueue_attempted = 1;
1443 
//->l_action用来指示ast回调时执行哪个动作,有OCFS2_AST_ATTACH, OCFS2_AST_CONVERT, OCFS2_AST_DOWNCONVERT;
1444                 if (lockres->l_action != OCFS2_AST_INVALID)
1445                         mlog(ML_ERROR, "lockres %s has action %u pending\n",
1446                              lockres->l_name, lockres->l_action);
1447 
1448                 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
//如果OCFS2_LOCK_ATTACHED为0,表示该锁资源的LVB还没有初始化,也意味着这是初次对该资源加锁;
1449                         lockres->l_action = OCFS2_AST_ATTACH;
1450                         lkm_flags &= ~DLM_LKF_CONVERT;
1451                 } else {
//否则,一定是申请锁转换
1452                         lockres->l_action = OCFS2_AST_CONVERT;
1453                         lkm_flags |= DLM_LKF_CONVERT;
1454                 }
1455 
1456                 lockres->l_requested = level;
1457                 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1458                 gen = lockres_set_pending(lockres);
1459                 spin_unlock_irqrestore(&lockres->l_lock, flags);
1460 
1461                 BUG_ON(level == DLM_LOCK_IV);
1462                 BUG_ON(level == DLM_LOCK_NL);
1463 
1464                 mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1465                      lockres->l_name, lockres->l_level, level);
1467                 /* call dlm_lock to upgrade lock now */
1468                 ret = ocfs2_dlm_lock(osb->cconn,
1469                                      level,
1470                                      &lockres->l_lksb,
1471                                      lkm_flags,
1472                                      lockres->l_name,
1473                                      OCFS2_LOCK_ID_MAX_LEN - 1);
1474                 lockres_clear_pending(lockres, gen, osb);
1475                 if (ret) {
1476                         if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
1477                             (ret != -EAGAIN)) {
1478                                 ocfs2_log_dlm_error("ocfs2_dlm_lock",
1479                                                     ret, lockres);
1480                         }
1481                         ocfs2_recover_from_dlm_error(lockres, 1);
1482                         goto out;
1483                 }
1484 
1485                 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1486                      lockres->l_name);
1487 
1488                 /* At this point we've gone inside the dlm and need to
1489                  * complete our work regardless. */
1490                 catch_signals = 0;
1491 
1492                 /* wait for busy to clear and carry on */
1493                 goto again;
1494         }

割...

1496 update_holders:
1497         /* Ok, if we get here then we're good to go. */
//能走到这一步,说明已经成功拿到了想要的锁
1498         ocfs2_inc_holders(lockres, level);
1499 
1500         ret = 0;
1501 unlock:
//#1502行,不清楚要干什么?
1502         lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1503 
1504         spin_unlock_irqrestore(&lockres->l_lock, flags);
1505 out:
1506         /*
1507          * This is helping work around a lock inversion between the page lock
1508          * and dlm locks.  One path holds the page lock while calling aops
1509          * which block acquiring dlm locks.  The voting thread holds dlm
1510          * locks while acquiring page locks while down converting data locks.
1511          * This block is helping an aop path notice the inversion and back
1512          * off to unlock its page lock before trying the dlm lock again.
1513          */
//因为args_flags=0,这个if语句不会成立,直接无视
1514         if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1515             mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1516                 wait = 0;
1517                 if (lockres_remove_mask_waiter(lockres, &mw))
1518                         ret = -EAGAIN;
1519                 else
1520                         goto again;
1521         }
//资源被占着,而且锁不兼容,只能慢慢等了!!!
1522         if (wait) {
1523                 ret = ocfs2_wait_for_mask(&mw);
1524                 if (ret == 0)
1525                         goto again;
1526                 mlog_errno(ret);
1527         }
1528         ocfs2_update_lock_stats(lockres, level, &mw, ret);
1542         return ret;
1543 }





你可能感兴趣的:(__ocfs2_cluster_lock())