android synchronized原理分析(二) Monitor

android synchronized原理分析(一) java HotSpot

概述

public class SyncLock {
    public void testBolokCode(){
        Thread thread1 = new Thread(new Runnable() {
            @Override
            public void run() {
                synchronized (SyncLock.this){
                    try{
                        Thread.sleep(1000);
                    }catch(Exception e){
                        e.printStackTrace();
                    }
                }
            }
        },"thread1");
        thread1.start();
        synchronized (SyncLock.this){
            try{
                Thread.sleep(1000);
            }catch(Exception e){
                e.printStackTrace();
            }
        }
    }

    public synchronized void testBolokMethod(){
    }
    public static synchronized void testStaticBolokMethod(){
    }
}

用java -jar baksmali-2.4.0.jar d classes2.dex -o classes2 命令转换为smali后

.method public static declared-synchronized testStaticBolokMethod()V
    .registers 1

    const-class v0, Lcom/kpgn/appone/SyncLock;

    monitor-enter v0

    .line 31
    monitor-exit v0

    return-void
.end method


# virtual methods
.method public testBolokCode()V
    .registers 4

    .line 5
    new-instance v0, Ljava/lang/Thread;

    new-instance v1, Lcom/kpgn/appone/SyncLock$1;

    invoke-direct {v1, p0}, Lcom/kpgn/appone/SyncLock$1;-><init>(Lcom/kpgn/appone/SyncLock;)V

    const-string v2, "thread1"

    invoke-direct {v0, v1, v2}, Ljava/lang/Thread;-><init>(Ljava/lang/Runnable;Ljava/lang/String;)V

    .line 17
    .local v0, "thread1":Ljava/lang/Thread;
    invoke-virtual {v0}, Ljava/lang/Thread;->start()V

    .line 18
    monitor-enter p0

    .line 20
    const-wide/16 v1, 0x3e8

    :try_start_12
    invoke-static {v1, v2}, Ljava/lang/Thread;->sleep(J)V
    :try_end_15
    .catch Ljava/lang/Exception; {:try_start_12 .. :try_end_15} :catch_18
    .catchall {:try_start_12 .. :try_end_15} :catchall_16

    .line 23
    goto :goto_1c

    .line 24
    :catchall_16
    move-exception v1

    goto :goto_1e

    .line 21
    :catch_18
    move-exception v1

    .line 22
    .local v1, "e":Ljava/lang/Exception;
    :try_start_19
    invoke-virtual {v1}, Ljava/lang/Exception;->printStackTrace()V

    .line 24
    .end local v1    # "e":Ljava/lang/Exception;
    :goto_1c
    monitor-exit p0

    .line 25
    return-void

    .line 24
    :goto_1e
    monitor-exit p0
    :try_end_1f
    .catchall {:try_start_19 .. :try_end_1f} :catchall_16

    throw v1
.end method

.method public declared-synchronized testBolokMethod()V
    .registers 1

    monitor-enter p0

    .line 28
    monitor-exit p0

    return-void
.end method

以上可以看到对应synchronized的虚拟机指令就是monitor-enter 和monitor-exit 。对应art/runtime/Monitor.cc 中的Monitor::MonitorEnter和Monitor::MonitorExit 函数实现,调用堆栈为

art::Monitor::MonitorEnter(art::Thread*, art::ObjPtr<art::mirror::Object>, bool)
art::ObjectLock<art::mirror::Class>::ObjectLock(art::Thread*, art::Handle<art::mirror::Class>)
* art/runtime/object_lock.cc
ObjectLock<T>::ObjectLock(Thread* self, Handle<T> object) : self_(self), obj_(object) {
  CHECK(object != nullptr);
  obj_->MonitorEnter(self_);
}

* art/runtime/mirror/object-inl.h
inline ObjPtr<mirror::Object> Object::MonitorEnter(Thread* self) {
  return Monitor::MonitorEnter(self, this, /*trylock=*/false);
}

* art/runtime/monitor.cc
ObjPtr<mirror::Object> Monitor::MonitorEnter(Thread* self, ObjPtr<mirror::Object> obj, bool trylock) {
  DCHECK(self != nullptr);
  DCHECK(obj != nullptr);
  self->AssertThreadSuspensionIsAllowable();
  obj = FakeLock(obj);
  uint32_t thread_id = self->GetThreadId();
  size_t contention_count = 0;
  StackHandleScope<1> hs(self);
  Handle<mirror::Object> h_obj(hs.NewHandle(obj));
  while (true) {
    // We initially read the lockword with ordinary Java/relaxed semantics. When stronger
    // semantics are needed, we address it below. Since GetLockWord bottoms out to a relaxed load,
    // we can fix it later, in an infrequently executed case, with a fence.
    LockWord lock_word = h_obj->GetLockWord(false);
    switch (lock_word.GetState()) {
      case LockWord::kUnlocked: {
        // No ordering required for preceding lockword read, since we retest.
        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
        if (h_obj->CasLockWord(lock_word, thin_locked, CASMode::kWeak, std::memory_order_acquire)) {
          AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
          return h_obj.Get();  // Success!
        }
        continue;  // Go again.
      }
      case LockWord::kThinLocked: {
        uint32_t owner_thread_id = lock_word.ThinLockOwner();
        if (owner_thread_id == thread_id) {
          // No ordering required for initial lockword read.
          // We own the lock, increase the recursion count.
          uint32_t new_count = lock_word.ThinLockCount() + 1;
          if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
            LockWord thin_locked(LockWord::FromThinLockId(thread_id,
                                                          new_count,
                                                          lock_word.GCState()));
            // Only this thread pays attention to the count. Thus there is no need for stronger
            // than relaxed memory ordering.
            if (!kUseReadBarrier) {
              h_obj->SetLockWord(thin_locked, /* as_volatile= */ false);
              AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
              return h_obj.Get();  // Success!
            } else {
              // Use CAS to preserve the read barrier state.
              if (h_obj->CasLockWord(lock_word,
                                     thin_locked,
                                     CASMode::kWeak,
                                     std::memory_order_relaxed)) {
                AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
                return h_obj.Get();  // Success!
              }
            }
            continue;  // Go again.
          } else {
            // We'd overflow the recursion count, so inflate the monitor.
            InflateThinLocked(self, h_obj, lock_word, 0);
          }
        } else {
          if (trylock) {
            return nullptr;
          }
          // Contention.
          contention_count++;
          Runtime* runtime = Runtime::Current();
          if (contention_count <= runtime->GetMaxSpinsBeforeThinLockInflation()) {
            // TODO: Consider switching the thread state to kWaitingForLockInflation when we are
            // yielding.  Use sched_yield instead of NanoSleep since NanoSleep can wait much longer
            // than the parameter you pass in. This can cause thread suspension to take excessively
            // long and make long pauses. See b/16307460.
            // TODO: We should literally spin first, without sched_yield. Sched_yield either does
            // nothing (at significant expense), or guarantees that we wait at least microseconds.
            // If the owner is running, I would expect the median lock hold time to be hundreds
            // of nanoseconds or less.
            sched_yield();
          } else {
            contention_count = 0;
            // No ordering required for initial lockword read. Install rereads it anyway.
            InflateThinLocked(self, h_obj, lock_word, 0);
          }
        }
        continue;  // Start from the beginning.
      }
      case LockWord::kFatLocked: {
        // We should have done an acquire read of the lockword initially, to ensure
        // visibility of the monitor data structure. Use an explicit fence instead.
        std::atomic_thread_fence(std::memory_order_acquire);
        Monitor* mon = lock_word.FatLockMonitor();
        if (trylock) {
          return mon->TryLock(self) ? h_obj.Get() : nullptr;
        } else {
          mon->Lock(self);
          return h_obj.Get();  // Success!
        }
      }
      case LockWord::kHashCode:
        // Inflate with the existing hashcode.
        // Again no ordering required for initial lockword read, since we don't rely
        // on the visibility of any prior computation.
        Inflate(self, nullptr, h_obj.Get(), lock_word.GetHashCode());
        continue;  // Start from the beginning.
      default: {
        LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
        UNREACHABLE();
      }
    }
  }
}

Android运行时初始化时会创建一个monitor池

frameworks/base/core/jni/AndroidRuntime.cpp
libnativehelper/JniInvocation.cpp
art/runtime/jni_internal.cc
art/runtime/runtime.cc
AndroidRuntime::startVm->JNI_CreateJavaVM->Runtime::Create->Runtime::Init->MonitorPool::Create()

  • art/runtime/monitor_pool.cc
    monitor_pool_ = MonitorPool::Create();

monitor pool使用一个chunk对应一个monitor。num_chunks_ 记录了当前chunk的数量,capacity_记录了当前chunk的容量,first_free_记录了当前第一个可用的chunk地址。初始化monitor池的时候,会调用AllocateChunk分配一个chunk,以后每次需要使用新的monitor的时候,也会调用AllocateChunk分配一个chunk。

Object类的定义。这个Object类是ART运行时里面的所有对象的基类,它有一个monitor_成员变量,如下所示:

* art/runtime/mirror/object.h
// C++ mirror of java.lang.Object
class MANAGED LOCKABLE Object {
...
// Monitor and hash code information.
  uint32_t monitor_;
...
}

这个32位的monitor_成员变量的责任重大,除了用来描述对象的Monitor和Hash Code信息之外,还包括对象的移动信息。这个monitor_成员变量通过封装成一个LockWord对象来描述

* art/runtime/lock_word.h
/* The lock value itself as stored in mirror::Object::monitor_.  The two most significant bits
 * encode the state. The four possible states are fat locked, thin/unlocked, hash code, and
 * forwarding address.
 *
 * When the lock word is in the "thin" state and its bits are formatted as follows:
 *
 *  |33|2|2|222222221111|1111110000000000|
 *  |10|9|8|765432109876|5432109876543210|
 *  |00|m|r| lock count |thread id owner |
 *
 * When the lock word is in the "fat" state and its bits are formatted as follows:
 *
 *  |33|2|2|2222222211111111110000000000|
 *  |10|9|8|7654321098765432109876543210|
 *  |01|m|r| MonitorId                  |
 *
 * When the lock word is in hash state and its bits are formatted as follows:
 *
 *  |33|2|2|2222222211111111110000000000|
 *  |10|9|8|7654321098765432109876543210|
 *  |10|m|r| HashCode                   |
 *
 * When the lock word is in forwarding address state and its bits are formatted as follows:
 *
 *  |33|2|22222222211111111110000000000|
 *  |10|9|87654321098765432109876543210|
 *  |11|0| ForwardingAddress           |
 *
 * The `r` bit stores the read barrier state.
 * The `m` bit stores the mark bit state.
 */
class LockWord {
 public:
  enum SizeShiftsAndMasks : uint32_t {  // private marker to avoid generate-operator-out.py from processing.
    // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
    kStateSize = 2,
    kReadBarrierStateSize = 1,
    kMarkBitStateSize = 1,
    // Number of bits to encode the thin lock owner.
    kThinLockOwnerSize = 16,
    // Remaining bits are the recursive lock count.
    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
        kMarkBitStateSize,

    // Thin lock bits. Owner in lowest bits.
    kThinLockOwnerShift = 0,
    kThinLockOwnerMask = (1 << kThinLockOwnerSize) - 1,
    kThinLockOwnerMaskShifted = kThinLockOwnerMask << kThinLockOwnerShift,
    kThinLockMaxOwner = kThinLockOwnerMask,
    // Count in higher bits.
    kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift,
    kThinLockCountMask = (1 << kThinLockCountSize) - 1,
    kThinLockMaxCount = kThinLockCountMask,
    kThinLockCountOne = 1 << kThinLockCountShift,  // == 65536 (0x10000)
    kThinLockCountMaskShifted = kThinLockCountMask << kThinLockCountShift,

    // State in the highest bits.
    kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
        kMarkBitStateSize,
    kStateMask = (1 << kStateSize) - 1,
    kStateMaskShifted = kStateMask << kStateShift,
    kStateThinOrUnlocked = 0,
    kStateFat = 1,
    kStateHash = 2,
    kStateForwardingAddress = 3,
    kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
    kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,

    // Read barrier bit.
    kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
    kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
    kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
    kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,

    // Mark bit.
    kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
    kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
    kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
    kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,

    // GC state is mark bit and read barrier state.
    kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
    kGCStateShift = kReadBarrierStateShift,
    kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
    kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,

    // When the state is kHashCode, the non-state bits hold the hashcode.
    // Note Object.hashCode() has the hash code layout hardcoded.
    kHashShift = 0,
    kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
    kHashMask = (1 << kHashSize) - 1,
    kMaxHash = kHashMask,

    // Forwarding address shift.
    kForwardingAddressShift = kObjectAlignmentShift,

    kMonitorIdShift = kHashShift,
    kMonitorIdSize = kHashSize,
    kMonitorIdMask = kHashMask,
    kMonitorIdAlignmentShift = 32 - kMonitorIdSize,
    kMonitorIdAlignment = 1 << kMonitorIdAlignmentShift,
    kMaxMonitorId = kMaxHash
  };
  ...
  enum LockState {
    kUnlocked,    // No lock owners.
    kThinLocked,  // Single uncontended owner.
    kFatLocked,   // See associated monitor.
    kHashCode,    // Lock word contains an identity hash.
    kForwardingAddress,  // Lock word contains the forwarding address of an object.
  };
  ...
  // Only Object should be converting LockWords to/from uints.
  friend class mirror::Object;

  // The encoded value holding all the state.
  uint32_t value_;
}

这里的monitor_id_,在Monitor类中也有存储,一个synchronized(Obj)拥有一个monitor_id_,即对同一个Obj的锁,拥有相同的monitor_id_,在MonitorEnter锁膨胀时,通过MonitorPool中取出monitor_id_对应的Monitor,来执行Lock方法

锁变换

Object对象的成员变量monitor_的高2位描述的是状态,包括kUnlocked、kThinLocked、kFatLocked、kHashCode和kForwardingAddress五种状态。处于不同状态时,低30位有不同的描述。

对于thinlock,LockWord头两位是00,其后14位是加锁次数,最后是归属的线程id。对于fatlock,LockWord头两位是01,剩下的是对应的monitor的id。LockWord是0的时候,表示该object未被加锁,这是每个objectd的monitor初始化的状态。

thinlock的加锁过程:进入到MonitorEnter后,说明即将要对该object进行加锁。LockWord在初始化时是0,于是通过线程id号和加锁次数(0,表示首次加锁)生成一个LockWord,通过CAS(Compare And Set)将LockWord设置成新生成的LockWord。这个过程就是thinlock的加锁过程。

thinlock的访问过程:如果访问该object的是thinlock的归属线程,将加锁次数加1后,更新LockWord。加锁次数有限制,当到达2^12-1时,调用InflateThinLocked通过锁膨胀将thinlock升级为fatlock。如果访问该object的是其他线程,将会调用sched_yield放弃处理器,让CPU选择合适的其他线程执行。contention_count记录了该线程尝试访问该object但未能成功的次数,但当contention_count超过某个阈值时,会调用InflateThinLocked通过锁膨胀将thinlock升级为fatlock。这个阈值默认是50,定义在monitor.h中 size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;也可以通过
“-XX:MaxSpinsBeforeThinLockInflation=”指定这个阈值。

可以看出,thinlock是一个自旋锁。在等待锁释放的过程中,线程并不会睡眠,只是暂时让出处理器,然后通过continue重新执行循环,检查LockWord对应的状态是否是kUnlocked(释放锁)。在锁被短时间占用的情况下,自旋锁是比较好的选择。但当contention_count超过一定程度时,说明该锁被长时间占用,使用自旋锁会带来额外的开销(CAS操作和忙等待),就会将thinlock升级为fatlock。

thinlock主要是调用原子类CompareAndSet,如果当前值 == 预期值,则以原子方式将该值设置为给定的更新值

与thinlock不同的是,非持有者线程在访问fatlock锁住的代码块时,是通过条件变量monitor_contenders_ 实现同步的。fatlock是个重量级锁,不持有锁的线程会被阻塞,直到锁释放将其唤醒。准确地说,thinlock并没有用到monitor,用到monitor的是fatlock

* art/runtime/mirror/object-readbarrier-inl.h
template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
inline bool Object::CasField32(MemberOffset field_offset,
                               int32_t old_value,
                               int32_t new_value,
                               CASMode mode,
                               std::memory_order memory_order) {
  if (kCheckTransaction) {
    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
  }
  if (kTransactionActive) {
    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
  }
  if (kVerifyFlags & kVerifyThis) {
    VerifyObject(this);
  }
  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);

  return atomic_addr->CompareAndSet(old_value, new_value, mode, memory_order);
}

inline bool Object::CasLockWord(LockWord old_val,
                                LockWord new_val,
                                CASMode mode,
                                std::memory_order memory_order) {
  // Force use of non-transactional mode and do not check.
  return CasField32<false, false>(MonitorOffset(),
                                  old_val.GetValue(),
                                  new_val.GetValue(),
                                  mode,
                                  memory_order);
}

锁膨胀

如果当前线程就是持有锁的线程,直接执行锁膨胀操作。如果当前线程不是持有锁的线程,先要阻塞持有锁的线程,再进行锁膨胀操作。

MonitorPool::CreateMonitor会创建一个新的monitor。接下来的Monitor::Install会通过CAS将加锁的object的LockWord改写成fatlock对应的LockWord,即头部标记”01”和刚创建的monitor id组合而成的LockWord。这样,当读取这个object的锁时,会发现这是个fatlock,于是进入到Monitor::Lock的流程中。

* art/runtime/monitor.cc
void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWord lock_word,
                                uint32_t hash_code) {
  DCHECK_EQ(lock_word.GetState(), LockWord::kThinLocked);
  uint32_t owner_thread_id = lock_word.ThinLockOwner();
  if (owner_thread_id == self->GetThreadId()) {
    // We own the monitor, we can easily inflate it.
    Inflate(self, self, obj.Get(), hash_code);
  } else {
    ThreadList* thread_list = Runtime::Current()->GetThreadList();
    // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
    self->SetMonitorEnterObject(obj.Get());
    bool timed_out;
    Thread* owner;
    {
      ScopedThreadSuspension sts(self, kWaitingForLockInflation);
      owner = thread_list->SuspendThreadByThreadId(owner_thread_id,
                                                   SuspendReason::kInternal,
                                                   &timed_out);
    }
    if (owner != nullptr) {
      // We succeeded in suspending the thread, check the lock's status didn't change.
      lock_word = obj->GetLockWord(true);
      if (lock_word.GetState() == LockWord::kThinLocked &&
          lock_word.ThinLockOwner() == owner_thread_id) {
        // Go ahead and inflate the lock.
        Inflate(self, owner, obj.Get(), hash_code);
      }
      bool resumed = thread_list->Resume(owner, SuspendReason::kInternal);
      DCHECK(resumed);
    }
    self->SetMonitorEnterObject(nullptr);
  }
}

void Monitor::Inflate(Thread* self, Thread* owner, ObjPtr<mirror::Object> obj, int32_t hash_code) {
  DCHECK(self != nullptr);
  DCHECK(obj != nullptr);
  // Allocate and acquire a new monitor.
  Monitor* m = MonitorPool::CreateMonitor(self, owner, obj, hash_code);
  DCHECK(m != nullptr);
  if (m->Install(self)) {
    if (owner != nullptr) {
      VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
          << " created monitor " << m << " for object " << obj;
    } else {
      VLOG(monitor) << "monitor: Inflate with hashcode " << hash_code
          << " created monitor " << m << " for object " << obj;
    }
    Runtime::Current()->GetMonitorList()->Add(m);
    CHECK_EQ(obj->GetLockWord(true).GetState(), LockWord::kFatLocked);
  } else {
    MonitorPool::ReleaseMonitor(self, m);
  }
}


template <LockReason reason>
void Monitor::Lock(Thread* self) {
  ScopedAssertNotHeld sanh(self, monitor_lock_);
  bool called_monitors_callback = false;
  monitor_lock_.Lock(self);
  while (true) {
    if (TryLockLocked(self)) {
      break;
    }
    // Contended.
    const bool log_contention = (lock_profiling_threshold_ != 0);
    uint64_t wait_start_ms = log_contention ? MilliTime() : 0;
    ArtMethod* owners_method = locking_method_;
    uint32_t owners_dex_pc = locking_dex_pc_;
    // Do this before releasing the lock so that we don't get deflated.
    size_t num_waiters = num_waiters_;
    ++num_waiters_;

    // If systrace logging is enabled, first look at the lock owner. Acquiring the monitor's
    // lock and then re-acquiring the mutator lock can deadlock.
    bool started_trace = false;
    if (ATraceEnabled()) {
      ...
    }

    monitor_lock_.Unlock(self);  // Let go of locks in order.
    // Call the contended locking cb once and only once. Also only call it if we are locking for
    // the first time, not during a Wait wakeup.
    if (reason == LockReason::kForLock && !called_monitors_callback) {
      called_monitors_callback = true;
      Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocking(this);
    }
    self->SetMonitorEnterObject(GetObject().Ptr());
    {
      ScopedThreadSuspension tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
      uint32_t original_owner_thread_id = 0u;
      {
        // Reacquire monitor_lock_ without mutator_lock_ for Wait.
        MutexLock mu2(self, monitor_lock_);
        if (owner_ != nullptr) {  // Did the owner_ give the lock up?
          original_owner_thread_id = owner_->GetThreadId();
          monitor_contenders_.Wait(self);  // Still contended so wait.
        }
      }
      if (original_owner_thread_id != 0u) {
        // Woken from contention.
        if (log_contention) {
          uint64_t wait_ms = MilliTime() - wait_start_ms;
          
          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
       
            // Reacquire mutator_lock_ for logging.
            ScopedObjectAccess soa(self);

            bool owner_alive = false;
            pid_t original_owner_tid = 0;
            std::string original_owner_name;

            const bool should_dump_stacks = stack_dump_lock_profiling_threshold_ > 0 &&
                wait_ms > stack_dump_lock_profiling_threshold_;
            std::string owner_stack_dump;
             ...
            // If we found the owner (and thus have owner data), go and log now.
            if (owner_alive) {
              // Give the detailed traces for really long contention.
              ...
              LogContentionEvent(self,
                                wait_ms,
                                // MIUI MOD: START
                                // sample_percent,
                                original_owner_tid,
                                // END
                                owners_method,
                                owners_dex_pc);
            }
          }
        }
      }
    }
    if (started_trace) {
      ATraceEnd();
    }
    self->SetMonitorEnterObject(nullptr);
    monitor_lock_.Lock(self);  // Reacquire locks in order.
    --num_waiters_;
  }
  monitor_lock_.Unlock(self);
  // We need to pair this with a single contended locking call. NB we match the RI behavior and call
  // this even if MonitorEnter failed.
  if (called_monitors_callback) {
    CHECK(reason == LockReason::kForLock);
    Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocked(this);
  }
}

这里的monitor_lock_ 为Mutex 类型

* art/runtime/base/mutex.cc
void Lock(Thread* self) ACQUIRE() {  ExclusiveLock(self); }
void ReaderWriterMutex::ExclusiveLock(Thread* self) {
  DCHECK(self == nullptr || self == Thread::Current());
  AssertNotExclusiveHeld(self);
#if ART_USE_FUTEXES
  bool done = false;
  do {
    int32_t cur_state = state_.load(std::memory_order_relaxed);
    if (LIKELY(cur_state == 0)) {
      // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
      done = state_.CompareAndSetWeakAcquire(0 /* cur_state*/, -1 /* new state */);
    } else {
      // Failed to acquire, hang up.
      ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
      ++num_pending_writers_;
      if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
        self->CheckEmptyCheckpointFromMutex();
      }
      if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, nullptr, nullptr, 0) != 0) {
        // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
        // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
        if ((errno != EAGAIN) && (errno != EINTR)) {
          PLOG(FATAL) << "futex wait failed for " << name_;
        }
      }
      --num_pending_writers_;
    }
  } while (!done);
  DCHECK_EQ(state_.load(std::memory_order_relaxed), -1);
#else
  CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
#endif
  DCHECK_EQ(GetExclusiveOwnerTid(), 0);
  exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
  RegisterAsLocked(self);
  AssertExclusiveHeld(self);
}

* art/runtime/base/mutex-inl.h
static inline int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout,
                        volatile int *uaddr2, int val3) {
  return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
}

Monitor 锁的功能实现依赖linux 的futex功能和原子操作的原理, Futex主要的作用有两点:支持一种粒度锁的睡眠与唤醒操作,其次是管理进程挂起时的等待队列

  • FUTEX_WAKE 参数:唤醒val个在state_.Address指向的锁变量上挂起等待的线程。如下唤醒1个线程
    futex(state_.Address(), FUTEX_WAKE, 1, nullptr, nullptr, 0);
  • FUTEX_WAIT参数:当state_.Address处的值和val值相等时,进入等待状态。对应线程将不再执行,必须唤醒后才能执行后面的操作
    futex(state_.Address(), FUTEX_WAIT, cur_state, nullptr, nullptr, 0)
  • FUTEX_CMP_REQUEUE参数:当sequence_.Address地址处的值和cur_sequence相等时,将sequence_.Address处挂起等待队列,转移到guard_.state_.Address(),的等待队列上。
    futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0, reinterpret_cast(std::numeric_limits::max()), guard_.state_.Address(), cur_sequence)

Lock过程

  1. 开启while true循环,调用TryLockLocked尝试获取锁,获取成功,则退出循环,获取失败,则进入步骤2
    TryLockLocked先检查锁的持有者
    如果持有者为null,则获取锁成功
    如果持有者为当前线程,则锁计数+1,返回成功
    如果持有者不为null,且不为当前线程,则获取失败
  2. 打印trace,当前线程持有monitor_lock_锁,并调用monitor_contenders_无限等待,monitor_contenders_为ConditionVariable类型的互斥条件变量,通过调用futex(sequence_.Address(), FUTEX_WAIT_PRIVATE, cur_sequence, nullptr, nullptr, 0)而进入等待。
  3. 如果锁持有者调用Monitor::Unlock释放锁,进而通过SignalContendersAndReleaseMonitorLock调用到monitor_contenders_.Signal时,则选择一个等待线程,唤醒它。

释放锁

Monitor::MonitorExit:对于thinlock,若LockWord中记录的加锁次数不为0,就将LockWord中记录的加锁次数减1。若LockWord中记录的加锁次数为0,则将LockWord清0,这样以后有线程在获取这个object的锁时,会发现这个锁是
kUnlocked状态的,可以直接占有这个锁。对于fatlock,就是通过条件变量ConditionVariable类型的monitor_contenders_.signal函数唤醒一个阻塞在这个锁的线程。

bool Monitor::MonitorExit(Thread* self, ObjPtr<mirror::Object> obj) {
  DCHECK(self != nullptr);
  DCHECK(obj != nullptr);
  self->AssertThreadSuspensionIsAllowable();
  obj = FakeUnlock(obj);
  StackHandleScope<1> hs(self);
  Handle<mirror::Object> h_obj(hs.NewHandle(obj));
  while (true) {
    LockWord lock_word = obj->GetLockWord(true);
    switch (lock_word.GetState()) {
      case LockWord::kHashCode:
        // Fall-through.
      case LockWord::kUnlocked:
        FailedUnlock(h_obj.Get(), self->GetThreadId(), 0u, nullptr);
        return false;  // Failure.
      case LockWord::kThinLocked: {
        uint32_t thread_id = self->GetThreadId();
        uint32_t owner_thread_id = lock_word.ThinLockOwner();
        if (owner_thread_id != thread_id) {
          FailedUnlock(h_obj.Get(), thread_id, owner_thread_id, nullptr);
          return false;  // Failure.
        } else {
          // We own the lock, decrease the recursion count.
          LockWord new_lw = LockWord::Default();
          if (lock_word.ThinLockCount() != 0) {
            uint32_t new_count = lock_word.ThinLockCount() - 1;
            new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
          } else {
            new_lw = LockWord::FromDefault(lock_word.GCState());
          }
          if (!kUseReadBarrier) {
            DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
            // TODO: This really only needs memory_order_release, but we currently have
            // no way to specify that. In fact there seem to be no legitimate uses of SetLockWord
            // with a final argument of true. This slows down x86 and ARMv7, but probably not v8.
            h_obj->SetLockWord(new_lw, true);
            AtraceMonitorUnlock();
            // Success!
            return true;
          } else {
            // Use CAS to preserve the read barrier state.
            if (h_obj->CasLockWord(lock_word, new_lw, CASMode::kWeak, std::memory_order_release)) {
              AtraceMonitorUnlock();
              // Success!
              return true;
            }
          }
          continue;  // Go again.
        }
      }
      case LockWord::kFatLocked: {
        Monitor* mon = lock_word.FatLockMonitor();
        return mon->Unlock(self);
      }
      default: {
        LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
        UNREACHABLE();
      }
    }
  }
}

通常,锁膨胀的操作是单向的,即thinlock可以膨胀成fatlock,但是fatlock不能收缩成thinlock。但是在后台进程进行堆裁剪时,会将所有的fatlock收缩成thinlock。

当为kFatLocked锁时,则获取当前对象的monitor,并调用Monitor::Unlock释放锁

bool Monitor::Unlock(Thread* self) {
  DCHECK(self != nullptr);
  uint32_t owner_thread_id = 0u;
  DCHECK(!monitor_lock_.IsExclusiveHeld(self));
  monitor_lock_.Lock(self);
  Thread* owner = owner_;
  if (owner != nullptr) {
    owner_thread_id = owner->GetThreadId();
  }
  if (owner == self) {
    // We own the monitor, so nobody else can be in here.
    AtraceMonitorUnlock();
    if (lock_count_ == 0) {
      owner_ = nullptr;
      locking_method_ = nullptr;
      locking_dex_pc_ = 0;
      SignalContendersAndReleaseMonitorLock(self);
      return true;
    } else {
      --lock_count_;
      monitor_lock_.Unlock(self);
      return true;
    }
  }
  // We don't own this, so we're not allowed to unlock it.
  // The JNI spec says that we should throw IllegalMonitorStateException in this case.
  FailedUnlock(GetObject(), self->GetThreadId(), owner_thread_id, this);
  monitor_lock_.Unlock(self);
  return false;
}
  • 先判断当前线程是否时锁的持有者,如果不是则时错误状态
  • 当前线程等锁个数为0时,则调用SignalContendersAndReleaseMonitorLock进行释放锁
void Monitor::SignalContendersAndReleaseMonitorLock(Thread* self) {
  // We want to signal one thread to wake up, to acquire the monitor that
  // we are releasing. This could either be a Thread waiting on its own
  // ConditionVariable, or a thread waiting on monitor_contenders_.
  //wake_set_ 不为null,是调用Object.wait的逻辑
  while (wake_set_ != nullptr) {
    // No risk of waking ourselves here; since monitor_lock_ is not released until we're ready to
    // return, notify can't move the current thread from wait_set_ to wake_set_ until this
    // method is done checking wake_set_.
    Thread* thread = wake_set_;
    wake_set_ = thread->GetWaitNext();
    thread->SetWaitNext(nullptr);
    // Check to see if the thread is still waiting.
    {
      // In the case of wait(), we'll be acquiring another thread's GetWaitMutex with
      // self's GetWaitMutex held. This does not risk deadlock, because we only acquire this lock
      // for threads in the wake_set_. A thread can only enter wake_set_ from Notify or NotifyAll,
      // and those hold monitor_lock_. Thus, the threads whose wait mutexes we acquire here must
      // have already been released from wait(), since we have not released monitor_lock_ until
      // after we've chosen our thread to wake, so there is no risk of the following lock ordering
      // leading to deadlock:
      // Thread 1 waits
      // Thread 2 waits
      // Thread 3 moves threads 1 and 2 from wait_set_ to wake_set_
      // Thread 1 enters this block, and attempts to acquire Thread 2's GetWaitMutex to wake it
      // Thread 2 enters this block, and attempts to acquire Thread 1's GetWaitMutex to wake it
      //
      // Since monitor_lock_ is not released until the thread-to-be-woken-up's GetWaitMutex is
      // acquired, two threads cannot attempt to acquire each other's GetWaitMutex while holding
      // their own and cause deadlock.
      MutexLock wait_mu(self, *thread->GetWaitMutex());
      if (thread->GetWaitMonitor() != nullptr) {
        // Release the lock, so that a potentially awakened thread will not
        // immediately contend on it. The lock ordering here is:
        // monitor_lock_, self->GetWaitMutex, thread->GetWaitMutex
        monitor_lock_.Unlock(self);
        thread->GetWaitConditionVariable()->Signal(self);
        return;
      }
    }
  }
  // If we didn't wake any threads that were originally waiting on us,
  // wake a contender.
  //调用Signal唤醒一个线程
  monitor_contenders_.Signal(self);
  monitor_lock_.Unlock(self);
}
* art/runtime/base/mutex.cc
void ConditionVariable::RequeueWaiters(int32_t count) {
  if (num_waiters_ > 0) {
    sequence_++;  // Indicate a signal occurred.
    // Move waiters from the condition variable's futex to the guard's futex,
    // so that they will be woken up when the mutex is released.
    bool done = futex(sequence_.Address(),
                      FUTEX_REQUEUE_PRIVATE,
                      /* Threads to wake */ 0,
                      /* Threads to requeue*/ reinterpret_cast<const timespec*>(count),
                      guard_.state_and_contenders_.Address(),
                      0) != -1;
    if (!done && errno != EAGAIN && errno != EINTR) {
      PLOG(FATAL) << "futex requeue failed for " << name_;
    }
  }
}
  • wake_set_ 不为null,是调用Object.wait的逻辑,从唤醒队列中获取一个等待的线程,唤醒它
  • 普通状态下,调用monitor_contenders_.Signal唤醒,进而调用到ConditionVariable::RequeueWaiters(1),然后调用futex(sequence_.Address(),
    FUTEX_REQUEUE_PRIVATE,
    /* Threads to wake / 0,
    /
    Threads to requeue*/ reinterpret_cast(count),
    guard_.state_and_contenders_.Address(),
    0)
    唤醒一个线程

wait notify

此外,我们常用Object.wait()和Object.notify()来进行线程的同步操作。这两个方法必须使用在以同一个Object为加锁对象的synchronized语句块中,而且都是native方法

* art/runtime/native/java_lang_Object.cc
static void Object_waitJI(JNIEnv* env, jobject java_this, jlong ms, jint ns) {
  ScopedFastNativeObjectAccess soa(env);
  soa.Decode<mirror::Object>(java_this)->Wait(soa.Self(), ms, ns);
}

* art/runtime/mirror/object-inl.h
inline void Object::Wait(Thread* self, int64_t ms, int32_t ns) {
  Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
}

* art/runtime/monitor.cc
void Monitor::Wait(Thread* self,
                   ObjPtr<mirror::Object> obj,
                   int64_t ms,
                   int32_t ns,
                   bool interruptShouldThrow,
                   ThreadState why) {
  DCHECK(self != nullptr);
  DCHECK(obj != nullptr);
  StackHandleScope<1> hs(self);
  Handle<mirror::Object> h_obj(hs.NewHandle(obj));

  Runtime::Current()->GetRuntimeCallbacks()->ObjectWaitStart(h_obj, ms);
  if (UNLIKELY(self->ObserveAsyncException() || self->IsExceptionPending())) {
    // See b/65558434 for information on handling of exceptions here.
    return;
  }

  LockWord lock_word = h_obj->GetLockWord(true);
  while (lock_word.GetState() != LockWord::kFatLocked) {
    switch (lock_word.GetState()) {
      case LockWord::kHashCode:
        // Fall-through.
      case LockWord::kUnlocked:
        ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
        return;  // Failure.
      case LockWord::kThinLocked: {
        uint32_t thread_id = self->GetThreadId();
        uint32_t owner_thread_id = lock_word.ThinLockOwner();
        if (owner_thread_id != thread_id) {
          ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
          return;  // Failure.
        } else {
          // We own the lock, inflate to enqueue ourself on the Monitor. May fail spuriously so
          // re-load.
          Inflate(self, self, h_obj.Get(), 0);
          lock_word = h_obj->GetLockWord(true);
        }
        break;
      }
      case LockWord::kFatLocked:  // Unreachable given the loop condition above. Fall-through.
      default: {
        LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
        UNREACHABLE();
      }
    }
  }
  Monitor* mon = lock_word.FatLockMonitor();
  mon->Wait(self, ms, ns, interruptShouldThrow, why);
}

wait()的内部实现中会对object的Monitor的LockWord进行检查。若不是加锁状态,会直接抛出异常”object not locked by thread before wait()”,说明使用wait()函数前需要使用synchronized进行加锁,这也是我们看到的Object.wait()是使用在synchronized语句内部的原因。

如果LockWord表明加的锁是ThinLock,若锁的所属线程不是当前线程,也会抛出异常”object not locked by thread before wait()”。若锁的所属线程是当前线程,将ThinLock锁膨胀为FatLock。由于膨胀过程需要用到CAS,所以可能会”fail spuriously”,于是重新执行while循环再次进行锁膨胀。锁膨胀成功后,调用Monitor的重载版本的wait函数。

void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
                   bool interruptShouldThrow, ThreadState why) {
  DCHECK(self != nullptr);
  DCHECK(why == kTimedWaiting || why == kWaiting || why == kSleeping);
  //monitor_lock_是一个互斥锁,使用Lock和Unlock来加锁一段代码
  monitor_lock_.Lock(self);

  // Make sure that we hold the lock.
  if (owner_ != self) {
    monitor_lock_.Unlock(self);
    ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
    return;
  }

  //设置线程状态为无限阻塞的kWaiting
  if (why == kTimedWaiting && (ms == 0 && ns == 0)) {
    why = kWaiting;
  }
  // Enforce the timeout range.
  if (ms < 0 || ns < 0 || ns > 999999) {
    monitor_lock_.Unlock(self);
    self->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
                             "timeout arguments out of range: ms=%" PRId64 " ns=%d", ms, ns);
    return;
  }

  /*
   * Release our hold - we need to let it go even if we're a few levels
   * deep in a recursive lock, and we need to restore that later.
   */
  int prev_lock_count = lock_count_;
  lock_count_ = 0;
  owner_ = nullptr;
  ArtMethod* saved_method = locking_method_;
  locking_method_ = nullptr;
  uintptr_t saved_dex_pc = locking_dex_pc_;
  locking_dex_pc_ = 0;

  AtraceMonitorUnlock();  // For the implict Unlock() just above. This will only end the deepest
                          // nesting, but that is enough for the visualization, and corresponds to
                          // the single Lock() we do afterwards.
  AtraceMonitorLock(self, GetObject(), /* is_wait= */ true);

  bool was_interrupted = false;
  bool timed_out = false;
  {
    //更新线程状态
    ScopedThreadSuspension sts(self, why);
    MutexLock mu(self, *self->GetWaitMutex());
     //将当前线程加入到wait_set_的链表末端
    AppendToWaitSet(self);
    //将等待者数量加1,因为该线程将要被阻塞
    ++num_waiters_;
    DCHECK(self->GetWaitMonitor() == nullptr);
    //设置线程的wait_monitor_为当前的monitor,表示因为这个monitor阻塞了
    self->SetWaitMonitor(this);

    //唤醒一个阻塞在monitor_contenders_的线程,
    //如上所述,要获得已被占用的FatLock时,会阻塞在monitor_contenders_条件变量
    SignalContendersAndReleaseMonitorLock(self);

    // Handle the case where the thread was interrupted before we called wait().
    if (self->IsInterrupted()) {
      was_interrupted = true;
    } else {
      // Wait for a notification or a timeout to occur.
      if (why == kWaiting) {
       //真正的阻塞,使用的是线程内部自带的条件变量
        self->GetWaitConditionVariable()->Wait(self);
      } else {
        DCHECK(why == kTimedWaiting || why == kSleeping) << why;
        timed_out = self->GetWaitConditionVariable()->TimedWait(self, ms, ns);
      }
      was_interrupted = self->IsInterrupted();
    }
  }

  {
    // We reset the thread's wait_monitor_ field after transitioning back to runnable so
    // that a thread in a waiting/sleeping state has a non-null wait_monitor_ for debugging
    // and diagnostic purposes. (If you reset this earlier, stack dumps will claim that threads
    // are waiting on "null".)
    MutexLock mu(self, *self->GetWaitMutex());
    DCHECK(self->GetWaitMonitor() != nullptr);
    //清空线程的wait_monitor_
    self->SetWaitMonitor(nullptr);
  }

  if (was_interrupted && interruptShouldThrow) {
    self->SetInterrupted(false);
    self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
  }

  AtraceMonitorUnlock();  // End Wait().

  // We just slept, tell the runtime callbacks about this.
  Runtime::Current()->GetRuntimeCallbacks()->MonitorWaitFinished(this, timed_out);

  //重新加锁
  Lock<LockReason::kForWait>(self);
  monitor_lock_.Lock(self);
  self->GetWaitMutex()->AssertNotHeld(self);

  //恢复数据,就像什么事都没发生过一样
  owner_ = self;
  lock_count_ = prev_lock_count;
  locking_method_ = saved_method;
  locking_dex_pc_ = saved_dex_pc;
  --num_waiters_;
  RemoveFromWaitSet(self);

  monitor_lock_.Unlock(self);
}

Monitor::Wait中会释放Fatlock锁,让竞争线程拿到锁执行。在wait_set_的头部拿出一个线程,如果该线程是因为Object.wait()(或其他wait重载版本)阻塞的话,唤醒它。所以,Object.notify()是按进入阻塞状态的先后顺序来决定唤醒的先后顺序的,谁先阻塞,就会被先唤醒。但是,Object.notify()不表明其他唤醒的线程能拿回锁,要在notify所在的synchronized语句块执行完,唤醒的线程才能重新加锁,否在会再次阻塞在Monitor::Lock上。就我所见,一般notify调用都是synchronized语句块的最后一句

void Monitor::Notify(Thread* self) {
  DCHECK(self != nullptr);
  MutexLock mu(self, monitor_lock_);
  // Make sure that we hold the lock.
  if (owner_ != self) {
    ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
    return;
  }
  // Move one thread from waiters to wake set
  Thread* to_move = wait_set_;
  if (to_move != nullptr) {
    wait_set_ = to_move->GetWaitNext();
    to_move->SetWaitNext(wake_set_);
    wake_set_ = to_move;
  }
}

如果我们把monitor_enter/monitor_exit看成是Fat Lock方式,则可以把Thin Lock看成是一种基于CAS(Compare and Swap)的简易实现。
这两种锁,简单一点理解,就是:
而基于CAS方式的实现,线程进入竞争状态的,获得锁的线程,会让其他线程处于自旋状态(也称之为Spin Mode,即自旋),这是一种while(Lock_release) doStuff()的Busy-Wait方式,是一种耗CPU的方式;而Fat Lock方式下,一个线程获得锁的时候,其他线程可以先sleep,等锁释放后,再唤醒(Notify)。
CAS的优点是快,如果没有线程竞争的情况下,因为CAS只需要一个指令便获得锁,所以称之为Thin Lock,缺点也是很明显的,即如果频繁发生线程竞争,CAS是低效,主要表现为,排斥在锁之外的线程是Busy Wait状态;而monitor_enter/monitor_exit/monitor_notify方式,则是重量级的,在线程产生竞争的时候,Fat Lock在OS mutex方式下,可以实现no busy-wait

Notify

void Monitor::Notify(Thread* self) {
  DCHECK(self != nullptr);
  printfTest(self, "Notify");
  MutexLock mu(self, monitor_lock_);
  // Make sure that we hold the lock.
  if (owner_ != self) {
    ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
    return;
  }
  // Move one thread from waiters to wake set
  Thread* to_move = wait_set_;
  if (to_move != nullptr) {
    wait_set_ = to_move->GetWaitNext();
    to_move->SetWaitNext(wake_set_);
    wake_set_ = to_move;
  }
}
  • 假如notify线程不等持锁线程,则抛出异常
  • 假如wait_set_等待列表不为空,则移出当前等待线程,并将wait_set_指向下一个线程
    并把当前等待线程放入wake_set_唤醒列表的头部,并将wake_set_指向它

NotifyAll

void Monitor::NotifyAll(Thread* self) {
  DCHECK(self != nullptr);
  printfTest(self, "NotifyAll");
  MutexLock mu(self, monitor_lock_);
  // Make sure that we hold the lock.
  if (owner_ != self) {
    ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
    return;
  }
  // Move all threads from waiters to wake set
  Thread* to_move = wait_set_;
  if (to_move != nullptr) {
    wait_set_ = nullptr;
    Thread* move_to = wake_set_;
    if (move_to == nullptr) {
      wake_set_ = to_move;
      return;
    }
    while (move_to->GetWaitNext() != nullptr) {
      move_to = move_to->GetWaitNext();
    }
    move_to->SetWaitNext(to_move);
  }
}
  • 假如notify线程不等持锁线程,则抛出异常
  • 假如wake_set_为空,则wait_set_作为wake_set_
  • 假如wake_set_不为空,则把wait_set_追加到wake_set_尾部

FUTEX

经研究发现,很多同步是无竞争的,即某个进程进入 互斥区,到再从某个互斥区出来这段时间,常常是没有进程也要进这个互斥区或者请求同一同步变量的。但是在这种情况下,这个进程也要陷入内核去看看有没有人 和它竞争,退出的时侯还要陷入内核去看看有没有进程等待在同一同步变量上。这些不必要的系统调用(或者说内核陷入)造成了大量的性能开销。为了解决这个问 题,Futex就应运而生,Futex是一种用户态和内核态混合的同步机制。首先,同步的进程间通过mmap共享一段内存,futex变量就位于这段共享 的内存中且操作是原子的,当进程尝试进入互斥区或者退出互斥区的时候,先去查看共享内存中的futex变量,如果没有竞争发生,则只修改futex,而不 用再执行系统调用了。当通过访问futex变量告诉进程有竞争发生,则还是得执行系统调用去完成相应的处理(wait 或者 wake up)。简单的说,futex就是通过在用户态的检查,(motivation)如果了解到没有竞争就不用陷入内核了,大大提高了low-contention时候的效率。 Linux从2.5.7开始支持Futex

* art/runtime/base/mutex-inl.h
static inline int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout,
                        volatile int *uaddr2, int val3) {
  return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
}

Monitor 锁的功能实现依赖linux 的futex功能和原子操作的原理, Futex主要的作用有两点:支持一种粒度锁的睡眠与唤醒操作,其次是管理进程挂起时的等待队列
uaddr就是用户态下共享内存的地址,里面存放的是一个对齐的整型计数器

Futex同步机制

所有的futex同步操作都应该从用户空间开始,首先创建一个futex同步变量,也就是位于共享内存的一个整型计数器。
当 进程尝试持有锁或者要进入互斥区的时候,对futex执行"down"操作,即原子性的给futex同步变量减1。如果同步变量变为0,则没有竞争发生, 进程照常执行。如果同步变量是个负数,则意味着有竞争发生,需要调用futex系统调用的futex_wait操作休眠当前进程。
当进程释放锁或 者要离开互斥区的时候,对futex进行"up"操作,即原子性的给futex同步变量加1。如果同步变量由0变成1,则没有竞争发生,进程照常执行。如 果加之前同步变量是负数,则意味着有竞争发生,需要调用futex系统调用的futex_wake操作唤醒一个或者多个等待进程。

这里的原子性加减通常是用CAS(Compare and Swap)完成的,与平台相关。CAS的基本形式是:CAS(addr,old,new),当addr中存放的值等于old时,用new对其替换。在x86平台上有专门的一条指令来完成它: cmpxchg。

可见: futex是从用户态开始,由用户态和核心态协调完成的

进程或者线程都可以利用futex来进行同步。
对于线程,情况比较简单,因为线程共享虚拟内存空间,虚拟地址就可以唯一的标识出futex变量,即线程用同样的虚拟地址来访问futex变量。
对于进程,情况相对复杂,因为进程有独立的虚拟内存空间,只有通过mmap()让它们共享一段地址空间来使用futex变量。每个进程用来访问futex的 虚拟地址可以是不一样的,只要系统知道所有的这些虚拟地址都映射到同一个物理内存地址,并用物理内存地址来唯一标识futex变量

  • FUTEX_WAKE 参数:唤醒val个在state_.Address指向的锁变量上挂起等待的线程。如下唤醒1个线程
    futex(state_.Address(), FUTEX_WAKE, 1, nullptr, nullptr, 0);
  • FUTEX_WAIT参数:当state_.Address处的值和val值相等时,进入等待状态。对应线程将不再执行,必须唤醒后才能执行后面的操作
    futex(state_.Address(), FUTEX_WAIT, cur_state, nullptr, nullptr, 0)
  • FUTEX_CMP_REQUEUE参数:当sequence_.Address地址处的值和cur_sequence相等时,将sequence_.Address处挂起等待队列,转移到guard_.state_.Address(),的等待队列上。
    futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0, reinterpret_cast(std::numeric_limits::max()), guard_.state_.Address(), cur_sequence)
    futex_requeue函数原型为

futex(sequence_.Address(),
FUTEX_REQUEUE_PRIVATE,
/* Threads to wake / 0,
/
Threads to requeue*/ reinterpret_cast(count),
guard_.state_and_contenders_.Address(),
0)
对应的为
static int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi);
这个函数的功能是,将uaddr1对应的futex等待队列出队最多 (nr_wake + nr_requeue) 个futex_q,先对出队的futex_q进行唤醒nr_wake个,剩下的才进行requeue到futex2等待队列。而pthread_cond_broadcast固定设定参数 nr_wake = 1,nr_requeue = INT_MAX。就是说在broadcast的时候只会唤醒一个线程去竞争futex2,其余阻塞在futex1的线程都会出队再入队到futex2的等待队列。
futex_requeue函数只会将可以马上获得pi-futex锁的线程唤醒,也就是说对每一个requeue到pi-futex的waiter都先尝试锁pi-futex,尝试成功的才能被唤醒,直到有线程被唤醒,其余waiter就直接requeue到pi-futex,而不再进行锁尝试

你可能感兴趣的:(技术总结)