class FillAndEmpty {
Exchanger exchanger = new Exchanger();
DataBuffer initialEmptyBuffer = ... a made-up type
DataBuffer initialFullBuffer = ...
class FillingLoop implements Runnable {
public void run() {
DataBuffer currentBuffer = initialEmptyBuffer;
try {
while (currentBuffer != null) {
if (currentBuffer.isFull())
currentBuffer = exchanger.exchange(currentBuffer);
} catch (InterruptedException ex) { ... handle ... }
class EmptyingLoop implements Runnable {
public void run() {
DataBuffer currentBuffer = initialFullBuffer;
try {
while (currentBuffer != null) {
if (currentBuffer.isEmpty())
currentBuffer = exchanger.exchange(currentBuffer);
} catch (InterruptedException ex) { ... handle ...}
void start() {
new Thread(new FillingLoop()).start();
new Thread(new EmptyingLoop()).start();
for (;;) {
if (slot is empty) { // offer
place item in a Node;
if (can CAS slot from empty to node) {
wait for release;
return matching item in node;
else if (can CAS slot from node to empty) { // release
get the item in node;
set matching item in node;
release waiting thread;
// else retry on CAS failure
这种方式在原则上是高效的,但实际上,像许多在单个位置上进行原子更新的算法一样,当许多参与者使用同一个Exchanger时,存在严重的伸缩性问题。所以我们的实现通过引入一个消去数组从而安排不同的线程使用不同的slot来降低竞争,并且保证最终会成对交换数据。这意味着我们不能完全地控制线程之间的划分方式,但是我们通过在竞争激烈时增加arena的范围,竞争变少时减少arena的范围来分配线程可用的下标。我们为了达到这个效果,通过ThreadLocals来定义Node,以及在Node中包含了线程下标index以及相应的跟踪状态。(对于每个线程,我们可以重用私有的Node而不是重新创建一个,因为slot只会通过CAS操作交替地变化(Node VS null),从而不会遇到ABA问题。当然,我们在使用时需要重新设置item)。
为了实现一个高效的arena(场地),我们仅在探测到竞争时才开辟空间(当然在单CPU时我们什么都不做),首先我们通过单个slot的slotExchange方法来交换数据,探测到竞争时,我们会通过arena安排不同位置的slot,并且保证没有slot会在同一个缓存行上(cache line)。因为当前没有办法确定缓存行的尺寸,我们使用对任何平台来说都足够大的值。并且我们通过其他手段来避免错误/非意愿的共享来增加局部性,比如对Node使用边距(via sun.misc.Contended),"bound"作为Exchanger的属性,以及使用区别于LockSupport的重新安排park/unpark的版本来工作。
在这种类型的代码中,由于方法依赖的大部分逻辑所读取的变量是通过局部量来维持的,所以方法是大片的并且难以分解--这里主要是通过连在一起的 spin->yield->block/cancel 代码)。以及严重依赖于本身具有的Unsafe机制和内联的CAS操作和相关内存读取操作。注意Node.item不是volatile的,尽管它会被释放线程所读取,因为读取操作只会在CAS操作完成之后才发生,以及所有自己的变量都是以能被接受的次序被其他操作所使用。当然这里也可以使用CAS操作来用match,但是这样会减慢速度)。
@sun.misc.Contended static final class Node {
int index; // Arena index
int bound; // Last recorded value of Exchanger.bound
int collides; // Number of CAS failures at current bound
int hash; // Pseudo-random for spins
Object item; // This thread's current item
volatile Object match; // Item provided by releasing thread
volatile Thread parked; // Set to this thread when parked, else null
/** The corresponding thread local class */
static final class Participant extends ThreadLocal {
public Node initialValue() { return new Node(); }
* Per-thread state
private final Participant participant;
* Elimination array; null until enabled (within slotExchange).
* Element accesses use emulation of volatile gets and CAS.
private volatile Node[] arena;
* Slot used until contention detected.
private volatile Node slot;
static final int FULL = (NCPU >= (MMASK << 1)) ? MMASK : NCPU >>> 1;
arena = new Node[(FULL + 2) << ASHIFT];
Node q = (Node)U.getObjectVolatile(a, j = (i << ASHIFT) + ABASE);
U = sun.misc.Unsafe.getUnsafe();
Class> ak = Node[].class;
ABASE = U.arrayBaseOffset(ak) + (1 << ASHIFT);
public V exchange(V x, long timeout, TimeUnit unit)
throws InterruptedException, TimeoutException {
Object v;
Object item = (x == null) ? NULL_ITEM : x;
long ns = unit.toNanos(timeout);
if ((arena != null ||
(v = slotExchange(item, true, ns)) == null) &&
((Thread.interrupted() ||
(v = arenaExchange(item, true, ns)) == null)))
throw new InterruptedException();
if (v == TIMED_OUT)
throw new TimeoutException();
return (v == NULL_ITEM) ? null : (V)v;
事实上这个限时版本存在一个微妙的bug。注意在slotExchange和arenaExchange中的时间参数ns是同一个,假如一次调用先经过slotExchange,再经过arenaExchange,那么就很可能发生等待时间过长的情况,原因是:U.park(false, ns)这个操作存在莫名其妙返回的可能,根据注释:*
* Exchange function used until arenas enabled. See above for explanation.
* @param item the item to exchange
* @param timed true if the wait is timed
* @param ns if timed, the maximum wait time, else 0L
* @return the other thread's item; or null if either the arena
* was enabled or the thread was interrupted before completion; or
* TIMED_OUT if timed and timed out
private final Object slotExchange(Object item, boolean timed, long ns) {
Node p = participant.get();
Thread t = Thread.currentThread();
if (t.isInterrupted()) // preserve interrupt status so caller can recheck
return null;
for (Node q;;) {
if ((q = slot) != null) {
if (U.compareAndSwapObject(this, SLOT, q, null)) {
Object v = q.item;
q.match = item;
Thread w = q.parked;
if (w != null)
return v;
// create arena on contention, but continue until slot null
if (NCPU > 1 && bound == 0 &&
U.compareAndSwapInt(this, BOUND, 0, SEQ))
arena = new Node[(FULL + 2) << ASHIFT];
else if (arena != null)
return null; // caller must reroute to arenaExchange
else {
p.item = item;
if (U.compareAndSwapObject(this, SLOT, null, p))
p.item = null;
// await release
int h = p.hash;
long end = timed ? System.nanoTime() + ns : 0L;
int spins = (NCPU > 1) ? SPINS : 1;
Object v;
while ((v = p.match) == null) {
if (spins > 0) {
h ^= h << 1; h ^= h >>> 3; h ^= h << 10;
if (h == 0)
h = SPINS | (int)t.getId();
else if (h < 0 && (--spins & ((SPINS >>> 1) - 1)) == 0)
else if (slot != p)
spins = SPINS;
else if (!t.isInterrupted() && arena == null &&
(!timed || (ns = end - System.nanoTime()) > 0L)) {
U.putObject(t, BLOCKER, this);
p.parked = t;
if (slot == p)
U.park(false, ns);
p.parked = null;
U.putObject(t, BLOCKER, null);
else if (U.compareAndSwapObject(this, SLOT, p, null)) {
v = timed && ns <= 0L && !t.isInterrupted() ? TIMED_OUT : null;
U.putOrderedObject(p, MATCH, null);
p.item = null;
p.hash = h;
return v;
* Exchange function when arenas enabled. See above for explanation.
* @param item the (non-null) item to exchange
* @param timed true if the wait is timed
* @param ns if timed, the maximum wait time, else 0L
* @return the other thread's item; or null if interrupted; or
* TIMED_OUT if timed and timed out
private final Object arenaExchange(Object item, boolean timed, long ns) {
Node[] a = arena;
Node p = participant.get();
for (int i = p.index;;) { // access slot at i
int b, m, c; long j; // j is raw array offset
Node q = (Node)U.getObjectVolatile(a, j = (i << ASHIFT) + ABASE);
if (q != null && U.compareAndSwapObject(a, j, q, null)) {
Object v = q.item; // release
q.match = item;
Thread w = q.parked;
if (w != null)
return v;
else if (i <= (m = (b = bound) & MMASK) && q == null) {
p.item = item; // offer
if (U.compareAndSwapObject(a, j, null, p)) {
long end = (timed && m == 0) ? System.nanoTime() + ns : 0L;
Thread t = Thread.currentThread(); // wait
for (int h = p.hash, spins = SPINS;;) {
Object v = p.match;
if (v != null) {
U.putOrderedObject(p, MATCH, null);
p.item = null; // clear for next use
p.hash = h;
return v;
else if (spins > 0) {
h ^= h << 1; h ^= h >>> 3; h ^= h << 10; // xorshift
if (h == 0) // initialize hash
h = SPINS | (int)t.getId();
else if (h < 0 && // approx 50% true
(--spins & ((SPINS >>> 1) - 1)) == 0)
Thread.yield(); // two yields per wait
else if (U.getObjectVolatile(a, j) != p)
spins = SPINS; // releaser hasn't set match yet
else if (!t.isInterrupted() && m == 0 &&
(!timed ||
(ns = end - System.nanoTime()) > 0L)) {
U.putObject(t, BLOCKER, this); // emulate LockSupport
p.parked = t; // minimize window
if (U.getObjectVolatile(a, j) == p)
U.park(false, ns);
p.parked = null;
U.putObject(t, BLOCKER, null);
else if (U.getObjectVolatile(a, j) == p &&
U.compareAndSwapObject(a, j, p, null)) {
if (m != 0) // try to shrink
U.compareAndSwapInt(this, BOUND, b, b + SEQ - 1);
p.item = null;
p.hash = h;
i = p.index >>>= 1; // descend
if (Thread.interrupted())
return null;
if (timed && m == 0 && ns <= 0L)
return TIMED_OUT;
break; // expired; restart
p.item = null; // clear offer
else {
if (p.bound != b) { // stale; reset
p.bound = b;
p.collides = 0;
i = (i != m || m == 0) ? m : m - 1;
else if ((c = p.collides) < m || m == FULL ||
!U.compareAndSwapInt(this, BOUND, b, b + SEQ + 1)) {
p.collides = c + 1;
i = (i == 0) ? m : i - 1; // cyclically traverse
i = m + 1; // grow
p.index = i;