1 Overview
在分布式系统中,通常会避免使用分布式锁。然而在某些场景下,还是存在对分布式锁的需求。跟普通锁相比,分布式锁面需要对的问题更多,例如怎样保证某个进程在持有锁时意外终止之后,其它进程也能够正常地获得锁等等。笔者认为一个比较好的分布式锁实现是Terracotta,但是这不是本文的重点,感兴趣的读者可以参考笔者的Terracotta in Action 系列文章(http://whitesock.iteye.com/blog/351780 , http://whitesock.iteye.com/blog/352876 , http://whitesock.iteye.com/blog/354587 )。
除了Terracotta,不少其它开源项目也声称支持分布式锁,例如ZooKeeper,JGroups和Hazelcast等。在这些项目中,笔者倾向于使用ZooKeeper。ZooKeeper在其官方文档的ZooKeeper Recipes and Solutions章节中介绍了一个分布式锁的实现,本文主要对该版本进行了改良。关于Hazelcast,笔者不得不说,其官方文档文字不少但却苍白,很多内容介绍的都是浅尝辄止,难道是强迫开发人员去仔细地阅读源码,或者参加其价格不菲的培训?
2 Implementation
首先,笔者希望分布式锁能够支持Java并发包中的Lock接口,并且最好是可重入的。此外,在某个进程持有分布式锁的过程中,如果不能保证该锁不会被其它进程同时持有(例如网络故障),那么至少应该能够通知锁的持有者,以便其采取相应的应对措施。以下是笔者对分布式锁的定义:
import java.util.concurrent.locks.Lock; public interface DistributedLock extends Lock { Listener getListener(); void setListener(Listener listener); /** * */ interface Listener { void onAbort(DistributedLock lock, Exception e); } }
其中Listener接口的作用是,在无法排它独占该锁时进行回调。接下来是笔者的两个实现的共通父类。
import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; public abstract class AbstractDistributedLock implements DistributedLock { // protected volatile boolean verbose; protected volatile Listener listener; protected final ReentrantLock lock = new ReentrantLock(); // protected abstract void doLock(); protected abstract void doUnlock(); protected abstract boolean doTryLock(); protected abstract void doLockInterruptibly() throws InterruptedException; protected abstract boolean doTryLock(long timeout, TimeUnit unit) throws InterruptedException; /** * */ public boolean isVerbose() { return verbose; } public void setVerbose(boolean verbose) { this.verbose = verbose; } public boolean isLocked() { return this.lock.isLocked(); } public boolean isHeldByCurrentThread() { return this.lock.isHeldByCurrentThread(); } /** * */ @Override public Listener getListener() { return this.listener; } @Override public void setListener(Listener listener) { this.listener = listener; } /** * */ @Override public void lock() { // this.lock.lock(); if(this.lock.getHoldCount() > 1) return; // boolean succeed = false; try { doLock(); succeed = true; } finally { if(!succeed) { this.lock.unlock(); } } } @Override public void lockInterruptibly() throws InterruptedException { // this.lock.lockInterruptibly(); if(this.lock.getHoldCount() > 1) return; // boolean succeed = false; try { doLockInterruptibly(); succeed = true; } finally { if(!succeed) { this.lock.unlock(); } } } @Override public boolean tryLock() { // if(!this.lock.tryLock()) return false; if(this.lock.getHoldCount() > 1) return true; // boolean succeed = false; try { succeed = doTryLock(); } finally { if(!succeed) { this.lock.unlock(); } } return succeed; } @Override public boolean tryLock(long timeout, TimeUnit unit) throws InterruptedException { // final long mark = System.nanoTime(); if(!this.lock.tryLock(timeout, unit)) return false; if(this.lock.getHoldCount() > 1) return true; // boolean succeed = false; try { timeout = TimeUnit.NANOSECONDS.convert(timeout, unit) - (System.nanoTime() - mark); if(timeout >= 0) { succeed = doTryLock(timeout, TimeUnit.NANOSECONDS); } } finally { if(!succeed) { this.lock.unlock(); } } return succeed; } @Override public void unlock() { // if(!this.lock.isHeldByCurrentThread()) return; if(this.lock.getHoldCount() > 1) return; // try { doUnlock(); } finally { this.lock.unlock(); } } @Override public Condition newCondition() { throw new UnsupportedOperationException(); } }
2.1 MySQL Named Lock
在讨论ZooKeeper的分布式锁实现之前,先介绍一下笔者基于MySQL Named Lock的一个实现。
import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import javax.sql.DataSource; import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.commons.lang.builder.ToStringStyle; import org.apache.commons.lang.exception.NestableRuntimeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public final class MySQLNamedLock extends AbstractDistributedLock { // private static final Logger LOGGER = LoggerFactory.getLogger(MySQLNamedLock.class); // private String name; private DataSource dataSource; private long validationInterval = 1000L; private ScheduledExecutorService scheduler; private final AtomicReference<Connection> connection; private final AtomicReference<ScheduledFuture<?>> future; /** * */ public MySQLNamedLock() { this(null, null, null); } public MySQLNamedLock(String name, DataSource dataSource, ScheduledExecutorService scheduler) { this.name = name; this.scheduler = scheduler; this.dataSource = dataSource; this.connection = new AtomicReference<Connection>(); this.future = new AtomicReference<ScheduledFuture<?>>(); } /** * */ @Override public String toString() { return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) .append("name", this.name).toString(); } /** * */ public String getName() { return name; } public void setName(String name) { this.name = name; } public long getValidationInterval() { return validationInterval; } public void setValidationInterval(long interval) { this.validationInterval = interval; } public DataSource getDataSource() { return dataSource; } public void setDataSource(DataSource dataSource) { this.dataSource = dataSource; } public ScheduledExecutorService getScheduler() { return scheduler; } public void setScheduler(ScheduledExecutorService scheduler) { this.scheduler = scheduler; } /** * */ @Override protected void doLock() { doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS); } @Override protected void doLockInterruptibly() { doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS); } @Override protected boolean doTryLock() { return doTryLock(0, TimeUnit.SECONDS); } @Override protected boolean doTryLock(long timeout, TimeUnit unit) { // Integer r = null; ResultSet rs = null; PreparedStatement ps = null; try { this.connection.set(this.dataSource.getConnection()); ps = this.connection.get().prepareStatement("SELECT GET_LOCK(?, ?)"); ps.setString(1, this.name); ps.setInt(2, (int)TimeUnit.SECONDS.convert(timeout, unit)); rs = ps.executeQuery(); if(rs.next()) { r = rs.getInt(1); if(rs.wasNull()) r = null; } } catch(Exception e) { throw new NestableRuntimeException("failed to lock, name: " + this.name, e); } finally { JdbcUtils.closeQuietly(rs); JdbcUtils.closeQuietly(ps); } // final boolean succeed = (r != null && r == 1); if(succeed && this.listener != null) { final long interval = this.validationInterval; this.future.set(this.scheduler.scheduleWithFixedDelay(new ValidationTask(), interval, interval, TimeUnit.MILLISECONDS)); } // return succeed; } @Override protected void doUnlock() { // final ScheduledFuture<?> f = this.future.getAndSet(null); if(f != null) f.cancel(true); // Integer r = null; ResultSet rs = null; PreparedStatement ps = null; try { // ps = this.connection.get().prepareStatement("SELECT RELEASE_LOCK(?)"); ps.setString(1, this.name); rs = ps.executeQuery(); if(rs.next()) { r = rs.getInt(1); if(rs.wasNull()) r = null; } // if(r == null) { LOGGER.warn("lock does NOT exist, name: {}", this.name); } else if(r == 0) { LOGGER.warn("lock was NOT accquired by current thread, name: {}", this.name); } else { LOGGER.warn("failed to unlock, name: {}, result: {}", this.name, r); } } catch(Exception e) { throw new NestableRuntimeException("failed to unlock, name: " + this.name, e); } finally { JdbcUtils.closeQuietly(rs); JdbcUtils.closeQuietly(ps); JdbcUtils.closeQuietly(this.connection.getAndSet(null)); } } /** * */ private class ValidationTask implements Runnable { @Override public void run() { try { ((com.mysql.jdbc.Connection)connection.get()).ping(); } catch(Exception e) { // if(isLocked() && listener != null && connection.get() != null) { listener.onAbort(MySQLNamedLock.this, e); } // throw new NestableRuntimeException(e); // Note: suppress subsequent executions } } } }
需要注意的是,如果在该锁上注册了Listener,并且Connection在持有锁的过程中失效,那么该Listener会被回调。
2.2 ZooKeeper Lock
以下代码是笔者对ZooKeeper官方版本的改良:
import java.lang.management.ManagementFactory; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.commons.lang.builder.ToStringStyle; import org.apache.commons.lang.exception.NestableRuntimeException; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public final class ZooKeeperLock extends AbstractDistributedLock { // private static final Logger LOGGER = LoggerFactory.getLogger(ZooKeeperLock.class); // private String directory; private ZooKeeper zookeeper; private final String processName; private final AtomicReference<ZooKeeperLocker> locker; /** * */ public ZooKeeperLock() { this(null, null); } public ZooKeeperLock(ZooKeeper zookeeper, String directory) { this.zookeeper = zookeeper; this.directory = directory; this.locker = new AtomicReference<ZooKeeperLocker>(); this.processName = ManagementFactory.getRuntimeMXBean().getName(); } /** * */ @Override public String toString() { return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) .append("directory", this.directory).toString(); } /** * */ public String getDirectory() { return directory; } public void setDirectory(String directory) { this.directory = directory; } public ZooKeeper getZookeeper() { return zookeeper; } public void setZookeeper(ZooKeeper zookeeper) { this.zookeeper = zookeeper; } /** * */ @Override protected void doLock() { doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS); } @Override protected void doLockInterruptibly() { doTryLock(Integer.MAX_VALUE, TimeUnit.SECONDS); } @Override protected boolean doTryLock() { return doTryLock(0, TimeUnit.SECONDS); } @Override protected boolean doTryLock(long timeout, TimeUnit unit) { try { this.locker.set(new ZooKeeperLocker()); return this.locker.get().lock(timeout, unit); } catch(Exception e) { throw new NestableRuntimeException("failed to lock, directory: " + this.directory, e); } } @Override protected void doUnlock() { try { this.locker.get().unlock(); } catch(Exception e) { throw new NestableRuntimeException("failed to unlock, directory: " + this.directory, e); } finally { this.locker.set(null); } } /** * */ private class ZooKeeperLocker implements Watcher { // private volatile String name; private volatile CountDownLatch latch; /** * */ @Override public void process(WatchedEvent event) { // if(this.latch != null) { this.latch.countDown(); } // if(isVerbose() && LOGGER.isInfoEnabled()) { LOGGER.info("received an event: {}", event); } } public boolean lock(long timeout, TimeUnit unit) throws Exception { boolean succeed = false; try { do { final long mark = System.nanoTime(); timeout = TimeUnit.NANOSECONDS.convert(timeout, unit); try { succeed = doLock(timeout, TimeUnit.NANOSECONDS); break; } catch (KeeperException.ConnectionLossException e) { timeout -= (System.nanoTime() - mark); if(isVerbose() && LOGGER.isInfoEnabled()) { LOGGER.info("connection was lost, directory: {}, name: {}, message: {}", new Object[]{directory, this.name, e.getMessage()}); } } } while(timeout > 0); } finally { if(!succeed) { // Unlock quietly try { unlock(); } catch(Exception e) { LOGGER.warn("failed to unlock, directory: " + directory + ", name: " + this.name, e); } } } return succeed; } public void unlock() throws Exception { try { zookeeper.delete(directory + "/" + this.name, -1); } catch (KeeperException.NoNodeException e) { LOGGER.warn("node does NOT exist, directory: {}, name: {}, message: {}", new Object[]{directory, this.name, e.getMessage()}); } finally { this.name = null; } } /** * */ private Boolean doLock(long timeout, TimeUnit unit) throws Exception { boolean succeed = false; do { // final long mark = System.nanoTime(); timeout = TimeUnit.NANOSECONDS.convert(timeout, unit); // if (this.name == null) { this.name = findOrCreateChild(); } // final List<String> children = zookeeper.getChildren(directory, false); if (children.isEmpty()) { this.name = null; LOGGER.warn("could not find any child, directory: {}, name: {}", new Object[]{directory, this.name}); } else { final SequenceComparator comparator = new SequenceComparator(); Collections.sort(children, comparator); final int index = Collections.binarySearch(children, this.name, comparator); if (index > 0) { // Not the first one this.latch = new CountDownLatch(1); final String previous = children.get(index - 1); final Stat stat = zookeeper.exists(directory + "/" + previous, this); if (stat != null) { this.latch.await(timeout, TimeUnit.NANOSECONDS); this.latch = null; } else { LOGGER.warn("could not find the previous child, directory: {}, name: {}", new Object[]{directory, this.name}); } } else { final String owner = children.get(0); if (this.name != null && owner != null && this.name.equals(owner)) { succeed = true; } else { LOGGER.warn("the lock should be held by current thread, directory: {}, name: {}, owner: {}", new Object[]{directory, this.name, owner}); } } } // timeout -= (System.nanoTime() - mark); } while (!succeed && timeout >= 0); return succeed; } private String findOrCreateChild() throws Exception { // final String prefix = zookeeper.getSessionId() + "-"; final List<String> children = zookeeper.getChildren(directory, false); for (String child : children) { if (child.startsWith(prefix)) { if(isVerbose() && LOGGER.isInfoEnabled()) { LOGGER.info("found a child, directory: {}, child: {}", new Object[]{directory, child}); } return child; } } // final String data = Thread.currentThread().getId() + "@" + processName; final String path = zookeeper.create(directory + "/" + prefix, data.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL); final String child = path.substring(path.lastIndexOf("/") + 1); if(isVerbose() && LOGGER.isInfoEnabled()) { LOGGER.info("created a child, directory: {}, path: {}", new Object[]{directory, child}); } return child; } } /** * */ private static class SequenceComparator implements Comparator<String> { @Override public int compare(String lhs, String rhs) { final int index1 = lhs.lastIndexOf('-'); final int index2 = rhs.lastIndexOf('-'); final int sequence1 = Integer.parseInt(lhs.substring(index1 + 1)); final int sequence2 = Integer.parseInt(rhs.substring(index2 + 1)); return sequence1 - sequence2; } } }
ZooKeeperLock是fair的,并且在Node中保存的数据是线程ID,进程ID以及主机名。需要注意的是,应该为ZooKeeper部署集群,此外还需要保证传入ZooKeeperLock构造函数中的ZooKepper实例已经跟Server建立的连接,否则zookeeper.getSessionId()会返回0,从而导致错误。
3 disclaimer
笔者只对以上代码进行了简单的测试,因此可能存在错误,请慎重使用。如果发现问题,感谢反馈。