android synchronized原理分析(一) java HotSpot
public class SyncLock {
public void testBolokCode(){
Thread thread1 = new Thread(new Runnable() {
@Override
public void run() {
synchronized (SyncLock.this){
try{
Thread.sleep(1000);
}catch(Exception e){
e.printStackTrace();
}
}
}
},"thread1");
thread1.start();
synchronized (SyncLock.this){
try{
Thread.sleep(1000);
}catch(Exception e){
e.printStackTrace();
}
}
}
public synchronized void testBolokMethod(){
}
public static synchronized void testStaticBolokMethod(){
}
}
用java -jar baksmali-2.4.0.jar d classes2.dex -o classes2 命令转换为smali后
.method public static declared-synchronized testStaticBolokMethod()V
.registers 1
const-class v0, Lcom/kpgn/appone/SyncLock;
monitor-enter v0
.line 31
monitor-exit v0
return-void
.end method
# virtual methods
.method public testBolokCode()V
.registers 4
.line 5
new-instance v0, Ljava/lang/Thread;
new-instance v1, Lcom/kpgn/appone/SyncLock$1;
invoke-direct {v1, p0}, Lcom/kpgn/appone/SyncLock$1;-><init>(Lcom/kpgn/appone/SyncLock;)V
const-string v2, "thread1"
invoke-direct {v0, v1, v2}, Ljava/lang/Thread;-><init>(Ljava/lang/Runnable;Ljava/lang/String;)V
.line 17
.local v0, "thread1":Ljava/lang/Thread;
invoke-virtual {v0}, Ljava/lang/Thread;->start()V
.line 18
monitor-enter p0
.line 20
const-wide/16 v1, 0x3e8
:try_start_12
invoke-static {v1, v2}, Ljava/lang/Thread;->sleep(J)V
:try_end_15
.catch Ljava/lang/Exception; {:try_start_12 .. :try_end_15} :catch_18
.catchall {:try_start_12 .. :try_end_15} :catchall_16
.line 23
goto :goto_1c
.line 24
:catchall_16
move-exception v1
goto :goto_1e
.line 21
:catch_18
move-exception v1
.line 22
.local v1, "e":Ljava/lang/Exception;
:try_start_19
invoke-virtual {v1}, Ljava/lang/Exception;->printStackTrace()V
.line 24
.end local v1 # "e":Ljava/lang/Exception;
:goto_1c
monitor-exit p0
.line 25
return-void
.line 24
:goto_1e
monitor-exit p0
:try_end_1f
.catchall {:try_start_19 .. :try_end_1f} :catchall_16
throw v1
.end method
.method public declared-synchronized testBolokMethod()V
.registers 1
monitor-enter p0
.line 28
monitor-exit p0
return-void
.end method
以上可以看到对应synchronized的虚拟机指令就是monitor-enter 和monitor-exit 。对应art/runtime/Monitor.cc 中的Monitor::MonitorEnter和Monitor::MonitorExit 函数实现,调用堆栈为
art::Monitor::MonitorEnter(art::Thread*, art::ObjPtr<art::mirror::Object>, bool)
art::ObjectLock<art::mirror::Class>::ObjectLock(art::Thread*, art::Handle<art::mirror::Class>)
* art/runtime/object_lock.cc
ObjectLock<T>::ObjectLock(Thread* self, Handle<T> object) : self_(self), obj_(object) {
CHECK(object != nullptr);
obj_->MonitorEnter(self_);
}
* art/runtime/mirror/object-inl.h
inline ObjPtr<mirror::Object> Object::MonitorEnter(Thread* self) {
return Monitor::MonitorEnter(self, this, /*trylock=*/false);
}
* art/runtime/monitor.cc
ObjPtr<mirror::Object> Monitor::MonitorEnter(Thread* self, ObjPtr<mirror::Object> obj, bool trylock) {
DCHECK(self != nullptr);
DCHECK(obj != nullptr);
self->AssertThreadSuspensionIsAllowable();
obj = FakeLock(obj);
uint32_t thread_id = self->GetThreadId();
size_t contention_count = 0;
StackHandleScope<1> hs(self);
Handle<mirror::Object> h_obj(hs.NewHandle(obj));
while (true) {
// We initially read the lockword with ordinary Java/relaxed semantics. When stronger
// semantics are needed, we address it below. Since GetLockWord bottoms out to a relaxed load,
// we can fix it later, in an infrequently executed case, with a fence.
LockWord lock_word = h_obj->GetLockWord(false);
switch (lock_word.GetState()) {
case LockWord::kUnlocked: {
// No ordering required for preceding lockword read, since we retest.
LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
if (h_obj->CasLockWord(lock_word, thin_locked, CASMode::kWeak, std::memory_order_acquire)) {
AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
return h_obj.Get(); // Success!
}
continue; // Go again.
}
case LockWord::kThinLocked: {
uint32_t owner_thread_id = lock_word.ThinLockOwner();
if (owner_thread_id == thread_id) {
// No ordering required for initial lockword read.
// We own the lock, increase the recursion count.
uint32_t new_count = lock_word.ThinLockCount() + 1;
if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
LockWord thin_locked(LockWord::FromThinLockId(thread_id,
new_count,
lock_word.GCState()));
// Only this thread pays attention to the count. Thus there is no need for stronger
// than relaxed memory ordering.
if (!kUseReadBarrier) {
h_obj->SetLockWord(thin_locked, /* as_volatile= */ false);
AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
return h_obj.Get(); // Success!
} else {
// Use CAS to preserve the read barrier state.
if (h_obj->CasLockWord(lock_word,
thin_locked,
CASMode::kWeak,
std::memory_order_relaxed)) {
AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
return h_obj.Get(); // Success!
}
}
continue; // Go again.
} else {
// We'd overflow the recursion count, so inflate the monitor.
InflateThinLocked(self, h_obj, lock_word, 0);
}
} else {
if (trylock) {
return nullptr;
}
// Contention.
contention_count++;
Runtime* runtime = Runtime::Current();
if (contention_count <= runtime->GetMaxSpinsBeforeThinLockInflation()) {
// TODO: Consider switching the thread state to kWaitingForLockInflation when we are
// yielding. Use sched_yield instead of NanoSleep since NanoSleep can wait much longer
// than the parameter you pass in. This can cause thread suspension to take excessively
// long and make long pauses. See b/16307460.
// TODO: We should literally spin first, without sched_yield. Sched_yield either does
// nothing (at significant expense), or guarantees that we wait at least microseconds.
// If the owner is running, I would expect the median lock hold time to be hundreds
// of nanoseconds or less.
sched_yield();
} else {
contention_count = 0;
// No ordering required for initial lockword read. Install rereads it anyway.
InflateThinLocked(self, h_obj, lock_word, 0);
}
}
continue; // Start from the beginning.
}
case LockWord::kFatLocked: {
// We should have done an acquire read of the lockword initially, to ensure
// visibility of the monitor data structure. Use an explicit fence instead.
std::atomic_thread_fence(std::memory_order_acquire);
Monitor* mon = lock_word.FatLockMonitor();
if (trylock) {
return mon->TryLock(self) ? h_obj.Get() : nullptr;
} else {
mon->Lock(self);
return h_obj.Get(); // Success!
}
}
case LockWord::kHashCode:
// Inflate with the existing hashcode.
// Again no ordering required for initial lockword read, since we don't rely
// on the visibility of any prior computation.
Inflate(self, nullptr, h_obj.Get(), lock_word.GetHashCode());
continue; // Start from the beginning.
default: {
LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
UNREACHABLE();
}
}
}
}
Android运行时初始化时会创建一个monitor池
frameworks/base/core/jni/AndroidRuntime.cpp
libnativehelper/JniInvocation.cpp
art/runtime/jni_internal.cc
art/runtime/runtime.cc
AndroidRuntime::startVm->JNI_CreateJavaVM->Runtime::Create->Runtime::Init->MonitorPool::Create()
- art/runtime/monitor_pool.cc
monitor_pool_ = MonitorPool::Create();
monitor pool使用一个chunk对应一个monitor。num_chunks_ 记录了当前chunk的数量,capacity_记录了当前chunk的容量,first_free_记录了当前第一个可用的chunk地址。初始化monitor池的时候,会调用AllocateChunk分配一个chunk,以后每次需要使用新的monitor的时候,也会调用AllocateChunk分配一个chunk。
Object类的定义。这个Object类是ART运行时里面的所有对象的基类,它有一个monitor_成员变量,如下所示:
* art/runtime/mirror/object.h
// C++ mirror of java.lang.Object
class MANAGED LOCKABLE Object {
...
// Monitor and hash code information.
uint32_t monitor_;
...
}
这个32位的monitor_成员变量的责任重大,除了用来描述对象的Monitor和Hash Code信息之外,还包括对象的移动信息。这个monitor_成员变量通过封装成一个LockWord对象来描述
* art/runtime/lock_word.h
/* The lock value itself as stored in mirror::Object::monitor_. The two most significant bits
* encode the state. The four possible states are fat locked, thin/unlocked, hash code, and
* forwarding address.
*
* When the lock word is in the "thin" state and its bits are formatted as follows:
*
* |33|2|2|222222221111|1111110000000000|
* |10|9|8|765432109876|5432109876543210|
* |00|m|r| lock count |thread id owner |
*
* When the lock word is in the "fat" state and its bits are formatted as follows:
*
* |33|2|2|2222222211111111110000000000|
* |10|9|8|7654321098765432109876543210|
* |01|m|r| MonitorId |
*
* When the lock word is in hash state and its bits are formatted as follows:
*
* |33|2|2|2222222211111111110000000000|
* |10|9|8|7654321098765432109876543210|
* |10|m|r| HashCode |
*
* When the lock word is in forwarding address state and its bits are formatted as follows:
*
* |33|2|22222222211111111110000000000|
* |10|9|87654321098765432109876543210|
* |11|0| ForwardingAddress |
*
* The `r` bit stores the read barrier state.
* The `m` bit stores the mark bit state.
*/
class LockWord {
public:
enum SizeShiftsAndMasks : uint32_t { // private marker to avoid generate-operator-out.py from processing.
// Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
kStateSize = 2,
kReadBarrierStateSize = 1,
kMarkBitStateSize = 1,
// Number of bits to encode the thin lock owner.
kThinLockOwnerSize = 16,
// Remaining bits are the recursive lock count.
kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
kMarkBitStateSize,
// Thin lock bits. Owner in lowest bits.
kThinLockOwnerShift = 0,
kThinLockOwnerMask = (1 << kThinLockOwnerSize) - 1,
kThinLockOwnerMaskShifted = kThinLockOwnerMask << kThinLockOwnerShift,
kThinLockMaxOwner = kThinLockOwnerMask,
// Count in higher bits.
kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift,
kThinLockCountMask = (1 << kThinLockCountSize) - 1,
kThinLockMaxCount = kThinLockCountMask,
kThinLockCountOne = 1 << kThinLockCountShift, // == 65536 (0x10000)
kThinLockCountMaskShifted = kThinLockCountMask << kThinLockCountShift,
// State in the highest bits.
kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
kMarkBitStateSize,
kStateMask = (1 << kStateSize) - 1,
kStateMaskShifted = kStateMask << kStateShift,
kStateThinOrUnlocked = 0,
kStateFat = 1,
kStateHash = 2,
kStateForwardingAddress = 3,
kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,
// Read barrier bit.
kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
// Mark bit.
kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,
// GC state is mark bit and read barrier state.
kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
kGCStateShift = kReadBarrierStateShift,
kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,
// When the state is kHashCode, the non-state bits hold the hashcode.
// Note Object.hashCode() has the hash code layout hardcoded.
kHashShift = 0,
kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
kHashMask = (1 << kHashSize) - 1,
kMaxHash = kHashMask,
// Forwarding address shift.
kForwardingAddressShift = kObjectAlignmentShift,
kMonitorIdShift = kHashShift,
kMonitorIdSize = kHashSize,
kMonitorIdMask = kHashMask,
kMonitorIdAlignmentShift = 32 - kMonitorIdSize,
kMonitorIdAlignment = 1 << kMonitorIdAlignmentShift,
kMaxMonitorId = kMaxHash
};
...
enum LockState {
kUnlocked, // No lock owners.
kThinLocked, // Single uncontended owner.
kFatLocked, // See associated monitor.
kHashCode, // Lock word contains an identity hash.
kForwardingAddress, // Lock word contains the forwarding address of an object.
};
...
// Only Object should be converting LockWords to/from uints.
friend class mirror::Object;
// The encoded value holding all the state.
uint32_t value_;
}
这里的monitor_id_,在Monitor类中也有存储,一个synchronized(Obj)拥有一个monitor_id_,即对同一个Obj的锁,拥有相同的monitor_id_,在MonitorEnter锁膨胀时,通过MonitorPool中取出monitor_id_对应的Monitor,来执行Lock方法
Object对象的成员变量monitor_的高2位描述的是状态,包括kUnlocked、kThinLocked、kFatLocked、kHashCode和kForwardingAddress五种状态。处于不同状态时,低30位有不同的描述。
对于thinlock,LockWord头两位是00,其后14位是加锁次数,最后是归属的线程id。对于fatlock,LockWord头两位是01,剩下的是对应的monitor的id。LockWord是0的时候,表示该object未被加锁,这是每个objectd的monitor初始化的状态。
thinlock的加锁过程:进入到MonitorEnter后,说明即将要对该object进行加锁。LockWord在初始化时是0,于是通过线程id号和加锁次数(0,表示首次加锁)生成一个LockWord,通过CAS(Compare And Set)将LockWord设置成新生成的LockWord。这个过程就是thinlock的加锁过程。
thinlock的访问过程:如果访问该object的是thinlock的归属线程,将加锁次数加1后,更新LockWord。加锁次数有限制,当到达2^12-1时,调用InflateThinLocked通过锁膨胀将thinlock升级为fatlock。如果访问该object的是其他线程,将会调用sched_yield放弃处理器,让CPU选择合适的其他线程执行。contention_count记录了该线程尝试访问该object但未能成功的次数,但当contention_count超过某个阈值时,会调用InflateThinLocked通过锁膨胀将thinlock升级为fatlock。这个阈值默认是50,定义在monitor.h中 size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;也可以通过
“-XX:MaxSpinsBeforeThinLockInflation=”指定这个阈值。
可以看出,thinlock是一个自旋锁。在等待锁释放的过程中,线程并不会睡眠,只是暂时让出处理器,然后通过continue重新执行循环,检查LockWord对应的状态是否是kUnlocked(释放锁)。在锁被短时间占用的情况下,自旋锁是比较好的选择。但当contention_count超过一定程度时,说明该锁被长时间占用,使用自旋锁会带来额外的开销(CAS操作和忙等待),就会将thinlock升级为fatlock。
thinlock主要是调用原子类CompareAndSet,如果当前值 == 预期值,则以原子方式将该值设置为给定的更新值
与thinlock不同的是,非持有者线程在访问fatlock锁住的代码块时,是通过条件变量monitor_contenders_ 实现同步的。fatlock是个重量级锁,不持有锁的线程会被阻塞,直到锁释放将其唤醒。准确地说,thinlock并没有用到monitor,用到monitor的是fatlock
* art/runtime/mirror/object-readbarrier-inl.h
template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
inline bool Object::CasField32(MemberOffset field_offset,
int32_t old_value,
int32_t new_value,
CASMode mode,
std::memory_order memory_order) {
if (kCheckTransaction) {
DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
}
if (kTransactionActive) {
Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
}
if (kVerifyFlags & kVerifyThis) {
VerifyObject(this);
}
uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
return atomic_addr->CompareAndSet(old_value, new_value, mode, memory_order);
}
inline bool Object::CasLockWord(LockWord old_val,
LockWord new_val,
CASMode mode,
std::memory_order memory_order) {
// Force use of non-transactional mode and do not check.
return CasField32<false, false>(MonitorOffset(),
old_val.GetValue(),
new_val.GetValue(),
mode,
memory_order);
}
如果当前线程就是持有锁的线程,直接执行锁膨胀操作。如果当前线程不是持有锁的线程,先要阻塞持有锁的线程,再进行锁膨胀操作。
MonitorPool::CreateMonitor会创建一个新的monitor。接下来的Monitor::Install会通过CAS将加锁的object的LockWord改写成fatlock对应的LockWord,即头部标记”01”和刚创建的monitor id组合而成的LockWord。这样,当读取这个object的锁时,会发现这是个fatlock,于是进入到Monitor::Lock的流程中。
* art/runtime/monitor.cc
void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWord lock_word,
uint32_t hash_code) {
DCHECK_EQ(lock_word.GetState(), LockWord::kThinLocked);
uint32_t owner_thread_id = lock_word.ThinLockOwner();
if (owner_thread_id == self->GetThreadId()) {
// We own the monitor, we can easily inflate it.
Inflate(self, self, obj.Get(), hash_code);
} else {
ThreadList* thread_list = Runtime::Current()->GetThreadList();
// Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
self->SetMonitorEnterObject(obj.Get());
bool timed_out;
Thread* owner;
{
ScopedThreadSuspension sts(self, kWaitingForLockInflation);
owner = thread_list->SuspendThreadByThreadId(owner_thread_id,
SuspendReason::kInternal,
&timed_out);
}
if (owner != nullptr) {
// We succeeded in suspending the thread, check the lock's status didn't change.
lock_word = obj->GetLockWord(true);
if (lock_word.GetState() == LockWord::kThinLocked &&
lock_word.ThinLockOwner() == owner_thread_id) {
// Go ahead and inflate the lock.
Inflate(self, owner, obj.Get(), hash_code);
}
bool resumed = thread_list->Resume(owner, SuspendReason::kInternal);
DCHECK(resumed);
}
self->SetMonitorEnterObject(nullptr);
}
}
void Monitor::Inflate(Thread* self, Thread* owner, ObjPtr<mirror::Object> obj, int32_t hash_code) {
DCHECK(self != nullptr);
DCHECK(obj != nullptr);
// Allocate and acquire a new monitor.
Monitor* m = MonitorPool::CreateMonitor(self, owner, obj, hash_code);
DCHECK(m != nullptr);
if (m->Install(self)) {
if (owner != nullptr) {
VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
<< " created monitor " << m << " for object " << obj;
} else {
VLOG(monitor) << "monitor: Inflate with hashcode " << hash_code
<< " created monitor " << m << " for object " << obj;
}
Runtime::Current()->GetMonitorList()->Add(m);
CHECK_EQ(obj->GetLockWord(true).GetState(), LockWord::kFatLocked);
} else {
MonitorPool::ReleaseMonitor(self, m);
}
}
template <LockReason reason>
void Monitor::Lock(Thread* self) {
ScopedAssertNotHeld sanh(self, monitor_lock_);
bool called_monitors_callback = false;
monitor_lock_.Lock(self);
while (true) {
if (TryLockLocked(self)) {
break;
}
// Contended.
const bool log_contention = (lock_profiling_threshold_ != 0);
uint64_t wait_start_ms = log_contention ? MilliTime() : 0;
ArtMethod* owners_method = locking_method_;
uint32_t owners_dex_pc = locking_dex_pc_;
// Do this before releasing the lock so that we don't get deflated.
size_t num_waiters = num_waiters_;
++num_waiters_;
// If systrace logging is enabled, first look at the lock owner. Acquiring the monitor's
// lock and then re-acquiring the mutator lock can deadlock.
bool started_trace = false;
if (ATraceEnabled()) {
...
}
monitor_lock_.Unlock(self); // Let go of locks in order.
// Call the contended locking cb once and only once. Also only call it if we are locking for
// the first time, not during a Wait wakeup.
if (reason == LockReason::kForLock && !called_monitors_callback) {
called_monitors_callback = true;
Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocking(this);
}
self->SetMonitorEnterObject(GetObject().Ptr());
{
ScopedThreadSuspension tsc(self, kBlocked); // Change to blocked and give up mutator_lock_.
uint32_t original_owner_thread_id = 0u;
{
// Reacquire monitor_lock_ without mutator_lock_ for Wait.
MutexLock mu2(self, monitor_lock_);
if (owner_ != nullptr) { // Did the owner_ give the lock up?
original_owner_thread_id = owner_->GetThreadId();
monitor_contenders_.Wait(self); // Still contended so wait.
}
}
if (original_owner_thread_id != 0u) {
// Woken from contention.
if (log_contention) {
uint64_t wait_ms = MilliTime() - wait_start_ms;
if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
// Reacquire mutator_lock_ for logging.
ScopedObjectAccess soa(self);
bool owner_alive = false;
pid_t original_owner_tid = 0;
std::string original_owner_name;
const bool should_dump_stacks = stack_dump_lock_profiling_threshold_ > 0 &&
wait_ms > stack_dump_lock_profiling_threshold_;
std::string owner_stack_dump;
...
// If we found the owner (and thus have owner data), go and log now.
if (owner_alive) {
// Give the detailed traces for really long contention.
...
LogContentionEvent(self,
wait_ms,
// MIUI MOD: START
// sample_percent,
original_owner_tid,
// END
owners_method,
owners_dex_pc);
}
}
}
}
}
if (started_trace) {
ATraceEnd();
}
self->SetMonitorEnterObject(nullptr);
monitor_lock_.Lock(self); // Reacquire locks in order.
--num_waiters_;
}
monitor_lock_.Unlock(self);
// We need to pair this with a single contended locking call. NB we match the RI behavior and call
// this even if MonitorEnter failed.
if (called_monitors_callback) {
CHECK(reason == LockReason::kForLock);
Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocked(this);
}
}
这里的monitor_lock_ 为Mutex 类型
* art/runtime/base/mutex.cc
void Lock(Thread* self) ACQUIRE() { ExclusiveLock(self); }
void ReaderWriterMutex::ExclusiveLock(Thread* self) {
DCHECK(self == nullptr || self == Thread::Current());
AssertNotExclusiveHeld(self);
#if ART_USE_FUTEXES
bool done = false;
do {
int32_t cur_state = state_.load(std::memory_order_relaxed);
if (LIKELY(cur_state == 0)) {
// Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
done = state_.CompareAndSetWeakAcquire(0 /* cur_state*/, -1 /* new state */);
} else {
// Failed to acquire, hang up.
ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
++num_pending_writers_;
if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
self->CheckEmptyCheckpointFromMutex();
}
if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, nullptr, nullptr, 0) != 0) {
// EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
// We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
if ((errno != EAGAIN) && (errno != EINTR)) {
PLOG(FATAL) << "futex wait failed for " << name_;
}
}
--num_pending_writers_;
}
} while (!done);
DCHECK_EQ(state_.load(std::memory_order_relaxed), -1);
#else
CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
#endif
DCHECK_EQ(GetExclusiveOwnerTid(), 0);
exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
RegisterAsLocked(self);
AssertExclusiveHeld(self);
}
* art/runtime/base/mutex-inl.h
static inline int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout,
volatile int *uaddr2, int val3) {
return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
}
Monitor 锁的功能实现依赖linux 的futex功能和原子操作的原理, Futex主要的作用有两点:支持一种粒度锁的睡眠与唤醒操作,其次是管理进程挂起时的等待队列
- FUTEX_WAKE 参数:唤醒val个在state_.Address指向的锁变量上挂起等待的线程。如下唤醒1个线程
futex(state_.Address(), FUTEX_WAKE, 1, nullptr, nullptr, 0);- FUTEX_WAIT参数:当state_.Address处的值和val值相等时,进入等待状态。对应线程将不再执行,必须唤醒后才能执行后面的操作
futex(state_.Address(), FUTEX_WAIT, cur_state, nullptr, nullptr, 0)- FUTEX_CMP_REQUEUE参数:当sequence_.Address地址处的值和cur_sequence相等时,将sequence_.Address处挂起等待队列,转移到guard_.state_.Address(),的等待队列上。
futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0, reinterpret_cast(std::numeric_limits ::max()), guard_.state_.Address(), cur_sequence)
- 开启while true循环,调用TryLockLocked尝试获取锁,获取成功,则退出循环,获取失败,则进入步骤2
TryLockLocked先检查锁的持有者
如果持有者为null,则获取锁成功
如果持有者为当前线程,则锁计数+1,返回成功
如果持有者不为null,且不为当前线程,则获取失败- 打印trace,当前线程持有monitor_lock_锁,并调用monitor_contenders_无限等待,monitor_contenders_为ConditionVariable类型的互斥条件变量,通过调用futex(sequence_.Address(), FUTEX_WAIT_PRIVATE, cur_sequence, nullptr, nullptr, 0)而进入等待。
- 如果锁持有者调用Monitor::Unlock释放锁,进而通过SignalContendersAndReleaseMonitorLock调用到monitor_contenders_.Signal时,则选择一个等待线程,唤醒它。
Monitor::MonitorExit:对于thinlock,若LockWord中记录的加锁次数不为0,就将LockWord中记录的加锁次数减1。若LockWord中记录的加锁次数为0,则将LockWord清0,这样以后有线程在获取这个object的锁时,会发现这个锁是
kUnlocked状态的,可以直接占有这个锁。对于fatlock,就是通过条件变量ConditionVariable类型的monitor_contenders_.signal函数唤醒一个阻塞在这个锁的线程。
bool Monitor::MonitorExit(Thread* self, ObjPtr<mirror::Object> obj) {
DCHECK(self != nullptr);
DCHECK(obj != nullptr);
self->AssertThreadSuspensionIsAllowable();
obj = FakeUnlock(obj);
StackHandleScope<1> hs(self);
Handle<mirror::Object> h_obj(hs.NewHandle(obj));
while (true) {
LockWord lock_word = obj->GetLockWord(true);
switch (lock_word.GetState()) {
case LockWord::kHashCode:
// Fall-through.
case LockWord::kUnlocked:
FailedUnlock(h_obj.Get(), self->GetThreadId(), 0u, nullptr);
return false; // Failure.
case LockWord::kThinLocked: {
uint32_t thread_id = self->GetThreadId();
uint32_t owner_thread_id = lock_word.ThinLockOwner();
if (owner_thread_id != thread_id) {
FailedUnlock(h_obj.Get(), thread_id, owner_thread_id, nullptr);
return false; // Failure.
} else {
// We own the lock, decrease the recursion count.
LockWord new_lw = LockWord::Default();
if (lock_word.ThinLockCount() != 0) {
uint32_t new_count = lock_word.ThinLockCount() - 1;
new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
} else {
new_lw = LockWord::FromDefault(lock_word.GCState());
}
if (!kUseReadBarrier) {
DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
// TODO: This really only needs memory_order_release, but we currently have
// no way to specify that. In fact there seem to be no legitimate uses of SetLockWord
// with a final argument of true. This slows down x86 and ARMv7, but probably not v8.
h_obj->SetLockWord(new_lw, true);
AtraceMonitorUnlock();
// Success!
return true;
} else {
// Use CAS to preserve the read barrier state.
if (h_obj->CasLockWord(lock_word, new_lw, CASMode::kWeak, std::memory_order_release)) {
AtraceMonitorUnlock();
// Success!
return true;
}
}
continue; // Go again.
}
}
case LockWord::kFatLocked: {
Monitor* mon = lock_word.FatLockMonitor();
return mon->Unlock(self);
}
default: {
LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
UNREACHABLE();
}
}
}
}
通常,锁膨胀的操作是单向的,即thinlock可以膨胀成fatlock,但是fatlock不能收缩成thinlock。但是在后台进程进行堆裁剪时,会将所有的fatlock收缩成thinlock。
当为kFatLocked锁时,则获取当前对象的monitor,并调用Monitor::Unlock释放锁
bool Monitor::Unlock(Thread* self) {
DCHECK(self != nullptr);
uint32_t owner_thread_id = 0u;
DCHECK(!monitor_lock_.IsExclusiveHeld(self));
monitor_lock_.Lock(self);
Thread* owner = owner_;
if (owner != nullptr) {
owner_thread_id = owner->GetThreadId();
}
if (owner == self) {
// We own the monitor, so nobody else can be in here.
AtraceMonitorUnlock();
if (lock_count_ == 0) {
owner_ = nullptr;
locking_method_ = nullptr;
locking_dex_pc_ = 0;
SignalContendersAndReleaseMonitorLock(self);
return true;
} else {
--lock_count_;
monitor_lock_.Unlock(self);
return true;
}
}
// We don't own this, so we're not allowed to unlock it.
// The JNI spec says that we should throw IllegalMonitorStateException in this case.
FailedUnlock(GetObject(), self->GetThreadId(), owner_thread_id, this);
monitor_lock_.Unlock(self);
return false;
}
- 先判断当前线程是否时锁的持有者,如果不是则时错误状态
- 当前线程等锁个数为0时,则调用SignalContendersAndReleaseMonitorLock进行释放锁
void Monitor::SignalContendersAndReleaseMonitorLock(Thread* self) {
// We want to signal one thread to wake up, to acquire the monitor that
// we are releasing. This could either be a Thread waiting on its own
// ConditionVariable, or a thread waiting on monitor_contenders_.
//wake_set_ 不为null,是调用Object.wait的逻辑
while (wake_set_ != nullptr) {
// No risk of waking ourselves here; since monitor_lock_ is not released until we're ready to
// return, notify can't move the current thread from wait_set_ to wake_set_ until this
// method is done checking wake_set_.
Thread* thread = wake_set_;
wake_set_ = thread->GetWaitNext();
thread->SetWaitNext(nullptr);
// Check to see if the thread is still waiting.
{
// In the case of wait(), we'll be acquiring another thread's GetWaitMutex with
// self's GetWaitMutex held. This does not risk deadlock, because we only acquire this lock
// for threads in the wake_set_. A thread can only enter wake_set_ from Notify or NotifyAll,
// and those hold monitor_lock_. Thus, the threads whose wait mutexes we acquire here must
// have already been released from wait(), since we have not released monitor_lock_ until
// after we've chosen our thread to wake, so there is no risk of the following lock ordering
// leading to deadlock:
// Thread 1 waits
// Thread 2 waits
// Thread 3 moves threads 1 and 2 from wait_set_ to wake_set_
// Thread 1 enters this block, and attempts to acquire Thread 2's GetWaitMutex to wake it
// Thread 2 enters this block, and attempts to acquire Thread 1's GetWaitMutex to wake it
//
// Since monitor_lock_ is not released until the thread-to-be-woken-up's GetWaitMutex is
// acquired, two threads cannot attempt to acquire each other's GetWaitMutex while holding
// their own and cause deadlock.
MutexLock wait_mu(self, *thread->GetWaitMutex());
if (thread->GetWaitMonitor() != nullptr) {
// Release the lock, so that a potentially awakened thread will not
// immediately contend on it. The lock ordering here is:
// monitor_lock_, self->GetWaitMutex, thread->GetWaitMutex
monitor_lock_.Unlock(self);
thread->GetWaitConditionVariable()->Signal(self);
return;
}
}
}
// If we didn't wake any threads that were originally waiting on us,
// wake a contender.
//调用Signal唤醒一个线程
monitor_contenders_.Signal(self);
monitor_lock_.Unlock(self);
}
* art/runtime/base/mutex.cc
void ConditionVariable::RequeueWaiters(int32_t count) {
if (num_waiters_ > 0) {
sequence_++; // Indicate a signal occurred.
// Move waiters from the condition variable's futex to the guard's futex,
// so that they will be woken up when the mutex is released.
bool done = futex(sequence_.Address(),
FUTEX_REQUEUE_PRIVATE,
/* Threads to wake */ 0,
/* Threads to requeue*/ reinterpret_cast<const timespec*>(count),
guard_.state_and_contenders_.Address(),
0) != -1;
if (!done && errno != EAGAIN && errno != EINTR) {
PLOG(FATAL) << "futex requeue failed for " << name_;
}
}
}
- wake_set_ 不为null,是调用Object.wait的逻辑,从唤醒队列中获取一个等待的线程,唤醒它
- 普通状态下,调用monitor_contenders_.Signal唤醒,进而调用到ConditionVariable::RequeueWaiters(1),然后调用futex(sequence_.Address(),
FUTEX_REQUEUE_PRIVATE,
/* Threads to wake / 0,
/ Threads to requeue*/ reinterpret_cast(count),
guard_.state_and_contenders_.Address(),
0)
唤醒一个线程
此外,我们常用Object.wait()和Object.notify()来进行线程的同步操作。这两个方法必须使用在以同一个Object为加锁对象的synchronized语句块中,而且都是native方法
* art/runtime/native/java_lang_Object.cc
static void Object_waitJI(JNIEnv* env, jobject java_this, jlong ms, jint ns) {
ScopedFastNativeObjectAccess soa(env);
soa.Decode<mirror::Object>(java_this)->Wait(soa.Self(), ms, ns);
}
* art/runtime/mirror/object-inl.h
inline void Object::Wait(Thread* self, int64_t ms, int32_t ns) {
Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
}
* art/runtime/monitor.cc
void Monitor::Wait(Thread* self,
ObjPtr<mirror::Object> obj,
int64_t ms,
int32_t ns,
bool interruptShouldThrow,
ThreadState why) {
DCHECK(self != nullptr);
DCHECK(obj != nullptr);
StackHandleScope<1> hs(self);
Handle<mirror::Object> h_obj(hs.NewHandle(obj));
Runtime::Current()->GetRuntimeCallbacks()->ObjectWaitStart(h_obj, ms);
if (UNLIKELY(self->ObserveAsyncException() || self->IsExceptionPending())) {
// See b/65558434 for information on handling of exceptions here.
return;
}
LockWord lock_word = h_obj->GetLockWord(true);
while (lock_word.GetState() != LockWord::kFatLocked) {
switch (lock_word.GetState()) {
case LockWord::kHashCode:
// Fall-through.
case LockWord::kUnlocked:
ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
return; // Failure.
case LockWord::kThinLocked: {
uint32_t thread_id = self->GetThreadId();
uint32_t owner_thread_id = lock_word.ThinLockOwner();
if (owner_thread_id != thread_id) {
ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
return; // Failure.
} else {
// We own the lock, inflate to enqueue ourself on the Monitor. May fail spuriously so
// re-load.
Inflate(self, self, h_obj.Get(), 0);
lock_word = h_obj->GetLockWord(true);
}
break;
}
case LockWord::kFatLocked: // Unreachable given the loop condition above. Fall-through.
default: {
LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
UNREACHABLE();
}
}
}
Monitor* mon = lock_word.FatLockMonitor();
mon->Wait(self, ms, ns, interruptShouldThrow, why);
}
wait()的内部实现中会对object的Monitor的LockWord进行检查。若不是加锁状态,会直接抛出异常”object not locked by thread before wait()”,说明使用wait()函数前需要使用synchronized进行加锁,这也是我们看到的Object.wait()是使用在synchronized语句内部的原因。
如果LockWord表明加的锁是ThinLock,若锁的所属线程不是当前线程,也会抛出异常”object not locked by thread before wait()”。若锁的所属线程是当前线程,将ThinLock锁膨胀为FatLock。由于膨胀过程需要用到CAS,所以可能会”fail spuriously”,于是重新执行while循环再次进行锁膨胀。锁膨胀成功后,调用Monitor的重载版本的wait函数。
void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
bool interruptShouldThrow, ThreadState why) {
DCHECK(self != nullptr);
DCHECK(why == kTimedWaiting || why == kWaiting || why == kSleeping);
//monitor_lock_是一个互斥锁,使用Lock和Unlock来加锁一段代码
monitor_lock_.Lock(self);
// Make sure that we hold the lock.
if (owner_ != self) {
monitor_lock_.Unlock(self);
ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
return;
}
//设置线程状态为无限阻塞的kWaiting
if (why == kTimedWaiting && (ms == 0 && ns == 0)) {
why = kWaiting;
}
// Enforce the timeout range.
if (ms < 0 || ns < 0 || ns > 999999) {
monitor_lock_.Unlock(self);
self->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
"timeout arguments out of range: ms=%" PRId64 " ns=%d", ms, ns);
return;
}
/*
* Release our hold - we need to let it go even if we're a few levels
* deep in a recursive lock, and we need to restore that later.
*/
int prev_lock_count = lock_count_;
lock_count_ = 0;
owner_ = nullptr;
ArtMethod* saved_method = locking_method_;
locking_method_ = nullptr;
uintptr_t saved_dex_pc = locking_dex_pc_;
locking_dex_pc_ = 0;
AtraceMonitorUnlock(); // For the implict Unlock() just above. This will only end the deepest
// nesting, but that is enough for the visualization, and corresponds to
// the single Lock() we do afterwards.
AtraceMonitorLock(self, GetObject(), /* is_wait= */ true);
bool was_interrupted = false;
bool timed_out = false;
{
//更新线程状态
ScopedThreadSuspension sts(self, why);
MutexLock mu(self, *self->GetWaitMutex());
//将当前线程加入到wait_set_的链表末端
AppendToWaitSet(self);
//将等待者数量加1,因为该线程将要被阻塞
++num_waiters_;
DCHECK(self->GetWaitMonitor() == nullptr);
//设置线程的wait_monitor_为当前的monitor,表示因为这个monitor阻塞了
self->SetWaitMonitor(this);
//唤醒一个阻塞在monitor_contenders_的线程,
//如上所述,要获得已被占用的FatLock时,会阻塞在monitor_contenders_条件变量
SignalContendersAndReleaseMonitorLock(self);
// Handle the case where the thread was interrupted before we called wait().
if (self->IsInterrupted()) {
was_interrupted = true;
} else {
// Wait for a notification or a timeout to occur.
if (why == kWaiting) {
//真正的阻塞,使用的是线程内部自带的条件变量
self->GetWaitConditionVariable()->Wait(self);
} else {
DCHECK(why == kTimedWaiting || why == kSleeping) << why;
timed_out = self->GetWaitConditionVariable()->TimedWait(self, ms, ns);
}
was_interrupted = self->IsInterrupted();
}
}
{
// We reset the thread's wait_monitor_ field after transitioning back to runnable so
// that a thread in a waiting/sleeping state has a non-null wait_monitor_ for debugging
// and diagnostic purposes. (If you reset this earlier, stack dumps will claim that threads
// are waiting on "null".)
MutexLock mu(self, *self->GetWaitMutex());
DCHECK(self->GetWaitMonitor() != nullptr);
//清空线程的wait_monitor_
self->SetWaitMonitor(nullptr);
}
if (was_interrupted && interruptShouldThrow) {
self->SetInterrupted(false);
self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
}
AtraceMonitorUnlock(); // End Wait().
// We just slept, tell the runtime callbacks about this.
Runtime::Current()->GetRuntimeCallbacks()->MonitorWaitFinished(this, timed_out);
//重新加锁
Lock<LockReason::kForWait>(self);
monitor_lock_.Lock(self);
self->GetWaitMutex()->AssertNotHeld(self);
//恢复数据,就像什么事都没发生过一样
owner_ = self;
lock_count_ = prev_lock_count;
locking_method_ = saved_method;
locking_dex_pc_ = saved_dex_pc;
--num_waiters_;
RemoveFromWaitSet(self);
monitor_lock_.Unlock(self);
}
Monitor::Wait中会释放Fatlock锁,让竞争线程拿到锁执行。在wait_set_的头部拿出一个线程,如果该线程是因为Object.wait()(或其他wait重载版本)阻塞的话,唤醒它。所以,Object.notify()是按进入阻塞状态的先后顺序来决定唤醒的先后顺序的,谁先阻塞,就会被先唤醒。但是,Object.notify()不表明其他唤醒的线程能拿回锁,要在notify所在的synchronized语句块执行完,唤醒的线程才能重新加锁,否在会再次阻塞在Monitor::Lock上。就我所见,一般notify调用都是synchronized语句块的最后一句
void Monitor::Notify(Thread* self) {
DCHECK(self != nullptr);
MutexLock mu(self, monitor_lock_);
// Make sure that we hold the lock.
if (owner_ != self) {
ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
return;
}
// Move one thread from waiters to wake set
Thread* to_move = wait_set_;
if (to_move != nullptr) {
wait_set_ = to_move->GetWaitNext();
to_move->SetWaitNext(wake_set_);
wake_set_ = to_move;
}
}
如果我们把monitor_enter/monitor_exit看成是Fat Lock方式,则可以把Thin Lock看成是一种基于CAS(Compare and Swap)的简易实现。
这两种锁,简单一点理解,就是:
而基于CAS方式的实现,线程进入竞争状态的,获得锁的线程,会让其他线程处于自旋状态(也称之为Spin Mode,即自旋),这是一种while(Lock_release) doStuff()的Busy-Wait方式,是一种耗CPU的方式;而Fat Lock方式下,一个线程获得锁的时候,其他线程可以先sleep,等锁释放后,再唤醒(Notify)。
CAS的优点是快,如果没有线程竞争的情况下,因为CAS只需要一个指令便获得锁,所以称之为Thin Lock,缺点也是很明显的,即如果频繁发生线程竞争,CAS是低效,主要表现为,排斥在锁之外的线程是Busy Wait状态;而monitor_enter/monitor_exit/monitor_notify方式,则是重量级的,在线程产生竞争的时候,Fat Lock在OS mutex方式下,可以实现no busy-wait
Notify
void Monitor::Notify(Thread* self) {
DCHECK(self != nullptr);
printfTest(self, "Notify");
MutexLock mu(self, monitor_lock_);
// Make sure that we hold the lock.
if (owner_ != self) {
ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
return;
}
// Move one thread from waiters to wake set
Thread* to_move = wait_set_;
if (to_move != nullptr) {
wait_set_ = to_move->GetWaitNext();
to_move->SetWaitNext(wake_set_);
wake_set_ = to_move;
}
}
- 假如notify线程不等持锁线程,则抛出异常
- 假如wait_set_等待列表不为空,则移出当前等待线程,并将wait_set_指向下一个线程
并把当前等待线程放入wake_set_唤醒列表的头部,并将wake_set_指向它
NotifyAll
void Monitor::NotifyAll(Thread* self) {
DCHECK(self != nullptr);
printfTest(self, "NotifyAll");
MutexLock mu(self, monitor_lock_);
// Make sure that we hold the lock.
if (owner_ != self) {
ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
return;
}
// Move all threads from waiters to wake set
Thread* to_move = wait_set_;
if (to_move != nullptr) {
wait_set_ = nullptr;
Thread* move_to = wake_set_;
if (move_to == nullptr) {
wake_set_ = to_move;
return;
}
while (move_to->GetWaitNext() != nullptr) {
move_to = move_to->GetWaitNext();
}
move_to->SetWaitNext(to_move);
}
}
- 假如notify线程不等持锁线程,则抛出异常
- 假如wake_set_为空,则wait_set_作为wake_set_
- 假如wake_set_不为空,则把wait_set_追加到wake_set_尾部
经研究发现,很多同步是无竞争的,即某个进程进入 互斥区,到再从某个互斥区出来这段时间,常常是没有进程也要进这个互斥区或者请求同一同步变量的。但是在这种情况下,这个进程也要陷入内核去看看有没有人 和它竞争,退出的时侯还要陷入内核去看看有没有进程等待在同一同步变量上。这些不必要的系统调用(或者说内核陷入)造成了大量的性能开销。为了解决这个问 题,Futex就应运而生,Futex是一种用户态和内核态混合的同步机制。首先,同步的进程间通过mmap共享一段内存,futex变量就位于这段共享 的内存中且操作是原子的,当进程尝试进入互斥区或者退出互斥区的时候,先去查看共享内存中的futex变量,如果没有竞争发生,则只修改futex,而不 用再执行系统调用了。当通过访问futex变量告诉进程有竞争发生,则还是得执行系统调用去完成相应的处理(wait 或者 wake up)。简单的说,futex就是通过在用户态的检查,(motivation)如果了解到没有竞争就不用陷入内核了,大大提高了low-contention时候的效率。 Linux从2.5.7开始支持Futex
* art/runtime/base/mutex-inl.h
static inline int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout,
volatile int *uaddr2, int val3) {
return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
}
Monitor 锁的功能实现依赖linux 的futex功能和原子操作的原理, Futex主要的作用有两点:支持一种粒度锁的睡眠与唤醒操作,其次是管理进程挂起时的等待队列
uaddr就是用户态下共享内存的地址,里面存放的是一个对齐的整型计数器
Futex同步机制
所有的futex同步操作都应该从用户空间开始,首先创建一个futex同步变量,也就是位于共享内存的一个整型计数器。
当 进程尝试持有锁或者要进入互斥区的时候,对futex执行"down"操作,即原子性的给futex同步变量减1。如果同步变量变为0,则没有竞争发生, 进程照常执行。如果同步变量是个负数,则意味着有竞争发生,需要调用futex系统调用的futex_wait操作休眠当前进程。
当进程释放锁或 者要离开互斥区的时候,对futex进行"up"操作,即原子性的给futex同步变量加1。如果同步变量由0变成1,则没有竞争发生,进程照常执行。如 果加之前同步变量是负数,则意味着有竞争发生,需要调用futex系统调用的futex_wake操作唤醒一个或者多个等待进程。
这里的原子性加减通常是用CAS(Compare and Swap)完成的,与平台相关。CAS的基本形式是:CAS(addr,old,new),当addr中存放的值等于old时,用new对其替换。在x86平台上有专门的一条指令来完成它: cmpxchg。
可见: futex是从用户态开始,由用户态和核心态协调完成的
进程或者线程都可以利用futex来进行同步。
对于线程,情况比较简单,因为线程共享虚拟内存空间,虚拟地址就可以唯一的标识出futex变量,即线程用同样的虚拟地址来访问futex变量。
对于进程,情况相对复杂,因为进程有独立的虚拟内存空间,只有通过mmap()让它们共享一段地址空间来使用futex变量。每个进程用来访问futex的 虚拟地址可以是不一样的,只要系统知道所有的这些虚拟地址都映射到同一个物理内存地址,并用物理内存地址来唯一标识futex变量
- FUTEX_WAKE 参数:唤醒val个在state_.Address指向的锁变量上挂起等待的线程。如下唤醒1个线程
futex(state_.Address(), FUTEX_WAKE, 1, nullptr, nullptr, 0);- FUTEX_WAIT参数:当state_.Address处的值和val值相等时,进入等待状态。对应线程将不再执行,必须唤醒后才能执行后面的操作
futex(state_.Address(), FUTEX_WAIT, cur_state, nullptr, nullptr, 0)- FUTEX_CMP_REQUEUE参数:当sequence_.Address地址处的值和cur_sequence相等时,将sequence_.Address处挂起等待队列,转移到guard_.state_.Address(),的等待队列上。
futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0, reinterpret_cast(std::numeric_limits ::max()), guard_.state_.Address(), cur_sequence)
futex_requeue函数原型为
futex(sequence_.Address(),
FUTEX_REQUEUE_PRIVATE,
/* Threads to wake / 0,
/ Threads to requeue*/ reinterpret_cast(count),
guard_.state_and_contenders_.Address(),
0)
对应的为
static int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi);
这个函数的功能是,将uaddr1对应的futex等待队列出队最多 (nr_wake + nr_requeue) 个futex_q,先对出队的futex_q进行唤醒nr_wake个,剩下的才进行requeue到futex2等待队列。而pthread_cond_broadcast固定设定参数 nr_wake = 1,nr_requeue = INT_MAX。就是说在broadcast的时候只会唤醒一个线程去竞争futex2,其余阻塞在futex1的线程都会出队再入队到futex2的等待队列。
futex_requeue函数只会将可以马上获得pi-futex锁的线程唤醒,也就是说对每一个requeue到pi-futex的waiter都先尝试锁pi-futex,尝试成功的才能被唤醒,直到有线程被唤醒,其余waiter就直接requeue到pi-futex,而不再进行锁尝试