SolrCloud中采用了DistributedQueue来同步节点间的状态信息。SolrCloud中总共会在3个地方保存队列信息:
/overseer/queue:保存每个shard的配置信息,以及状态信息(recovering,recovery_failed,active,down,sync)
对应的生产者为:ZKController中的overseerJobQueue
消费者:Overseer.ClusterStateUpdater中的stateUpdateQueue;
/overseer/queue-work:正在处理中的消息,首先shard中信息会先保存到/overseer/queue下面,进行处理时会移到/overseer/queue-work中,处理完后消息之后在从/overseer/queue-work中删除
生产者:stateUpdateQueue
消费者:Overseer.ClusterStateUpdater中的workQueue
/overseer/collection-queue-work:只有在create,delete,reload collection时候才会触发到此队列,只是保存相应的collection操作信息。待collection操作成功之后,还会涉及到/overseer/queue和/overseer/queue-work之中
生产者:ZKController中的overseerCollectionQueue
消费者:OverseerCollectionProcessor中的workQueue
DistributedQueue源码:
package org.apache.solr.cloud; import java.util.List; import java.util.NoSuchElementException; import java.util.TreeMap; import java.util.concurrent.CountDownLatch; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.data.ACL; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * zookeeper可以通过Sequence Nodes来实现分布式队列 * 采用sequential在client在申请创建该节点时,zk会自动在节点路径末尾添加递增序号, */ public class DistributedQueue { private static final Logger LOG = LoggerFactory.getLogger(DistributedQueue.class); private final String dir; //队列的上层访问路径 private SolrZkClient zookeeper; private List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; // 访问控制列表,这里是一个完全打开的ACL,允许任何客户端对znode进行读写 private final String prefix = "qn-"; // 节点的名称前缀 public DistributedQueue(SolrZkClient zookeeper, String dir, List<ACL> acl) { this.dir = dir; if (acl != null) { this.acl = acl; } this.zookeeper = zookeeper; } /** * 对序列号进行排序,实现分布式队列的关键,保证了消息的有序性 */ private TreeMap<Long,String> orderedChildren(Watcher watcher) throws KeeperException, InterruptedException { TreeMap<Long,String> orderedChildren = new TreeMap<Long,String>(); List<String> childNames = null; try { childNames = zookeeper.getChildren(dir, watcher, true); // 节点名称 } catch (KeeperException.NoNodeException e) { throw e; } for (String childName : childNames) { try { if (!childName.regionMatches(0, prefix, 0, prefix.length())) { LOG.warn("Found child node with improper name: " + childName); continue; } String suffix = childName.substring(prefix.length()); Long childId = new Long(suffix); // 递增的序列号 orderedChildren.put(childId, childName); } catch (NumberFormatException e) { LOG.warn("Found child node with improper format : " + childName + " " + e, e); } } return orderedChildren; } /** * 返回队首元素 */ public byte[] element() throws NoSuchElementException, KeeperException, InterruptedException { TreeMap<Long,String> orderedChildren; while (true) { try { orderedChildren = orderedChildren(null); } catch (KeeperException.NoNodeException e) { throw new NoSuchElementException(); } if (orderedChildren.size() == 0) throw new NoSuchElementException(); for (String headNode : orderedChildren.values()) { if (headNode != null) { try { return zookeeper.getData(dir + "/" + headNode, null, null, true); } catch (KeeperException.NoNodeException e) { // Another client removed the node first, try next } } } } } /** * 删除队首元素 */ public byte[] remove() throws NoSuchElementException, KeeperException, InterruptedException { TreeMap<Long,String> orderedChildren; // Same as for element. Should refactor this. while (true) { try { orderedChildren = orderedChildren(null); } catch (KeeperException.NoNodeException e) { throw new NoSuchElementException(); } if (orderedChildren.size() == 0) throw new NoSuchElementException(); for (String headNode : orderedChildren.values()) { String path = dir + "/" + headNode; try { byte[] data = zookeeper.getData(path, null, null, true); zookeeper.delete(path, -1, true); return data; } catch (KeeperException.NoNodeException e) { // Another client deleted the node first. } } } } /** * zk的watch机制,没什么特别只是添加了个日志的debug */ private class LatchChildWatcher implements Watcher { CountDownLatch latch; public LatchChildWatcher() { latch = new CountDownLatch(1); } public void process(WatchedEvent event) { LOG.debug("Watcher fired on path: " + event.getPath() + " state: " + event.getState() + " type " + event.getType()); latch.countDown(); } public void await() throws InterruptedException { latch.await(); } } /** * 出队操作 */ public byte[] take() throws KeeperException, InterruptedException { TreeMap<Long,String> orderedChildren; // Same as for element. Should refactor this. while (true) { LatchChildWatcher childWatcher = new LatchChildWatcher(); try { orderedChildren = orderedChildren(childWatcher); } catch (KeeperException.NoNodeException e) { zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true); continue; } if (orderedChildren.size() == 0) { // 如果orderedChildren为0的话,则等待 childWatcher.await(); continue; } /** * 对于失败的delete操作,client转向处理下一个node */ for (String headNode : orderedChildren.values()) { String path = dir + "/" + headNode; try { byte[] data = zookeeper.getData(path, null, null, true); zookeeper.delete(path, -1, true); return data; } catch (KeeperException.NoNodeException e) { // 这个删除操作有可能失败,因为可能有其他的消费者已经成功的获取该znode // Another client deleted the node first. } } // 如果最后还没有成功的delete一个item,则在重新orderedChildren() } } /** * 入队操作 * 不需要任何的锁来保证client对同一个znode的操作有序性。由zk负责按顺序分配序列号 */ public boolean offer(byte[] data) throws KeeperException, InterruptedException { for (;;) { try { zookeeper.create(dir + "/" + prefix, data, acl, CreateMode.PERSISTENT_SEQUENTIAL, true); return true; } catch (KeeperException.NoNodeException e) { try { zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true); } catch (KeeperException.NodeExistsException ne) { //someone created it } } } } /** * 返回队首信息,如果队列为空,则返回null */ public byte[] peek() throws KeeperException, InterruptedException { try { return element(); } catch (NoSuchElementException e) { return null; } } /** * block为true的时候,如果队列为空,则会一直阻塞,直到有数据返回 */ public byte[] peek(boolean block) throws KeeperException, InterruptedException { if (!block) { return peek(); } TreeMap<Long,String> orderedChildren; while (true) { LatchChildWatcher childWatcher = new LatchChildWatcher(); try { orderedChildren = orderedChildren(childWatcher); } catch (KeeperException.NoNodeException e) { zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true); continue; } if (orderedChildren.size() == 0) { childWatcher.await(); continue; } for (String headNode : orderedChildren.values()) { String path = dir + "/" + headNode; try { byte[] data = zookeeper.getData(path, null, null, true); return data; } catch (KeeperException.NoNodeException e) { // Another client deleted the node first. } } } } /** * 删除队首,如果队列为空,则返回null */ public byte[] poll() throws KeeperException, InterruptedException { try { return remove(); } catch (NoSuchElementException e) { return null; } } public static void main(String[] args) throws KeeperException, InterruptedException { SolrZkClient client = new SolrZkClient("localhost", 5*1000); DistributedQueue queue = new DistributedQueue(client, "/overseer/queue", null); queue.offer("test".getBytes()); System.out.println(new String(queue.take())); } }