高可用HA(High Availability)是分布式系统架构设计中必须考虑的因素之一,成熟的架构大部分都自带高可用实现,只需配置即可,例如基于zookeeper的storm、spark,kafka,基于数据库的quartz(Terracotta不考虑)等等。很多组件都基于zookeeper,zookeeper也成为了后端人员必修功课之一。
各工作节点,在zookeeper同一目录下一同创建EPHEMERAL_SEQUENTIAL节点,抢到第一个编号0的节点,成为leader。
基础提要:
1、临时节点会随着zookeeper连接的关闭自动删除
1、DefaultWatcher
package com.ai.xiajw.watch;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import java.util.concurrent.CountDownLatch;
public class DefaultWatcher implements Watcher {
private CountDownLatch countDownLatch;
public DefaultWatcher(CountDownLatch countDownLatch){
this.countDownLatch = countDownLatch;
}
@Override
public void process(WatchedEvent watchedEvent) {
Event.KeeperState keeperState = watchedEvent.getState();
Event.EventType eventType = watchedEvent.getType();
if(keeperState.equals(Event.KeeperState.SyncConnected) && eventType.equals(Event.EventType.None)){ // zookeeper连接成功
countDownLatch.countDown();
System.out.println("zookeeper 创建连接成功........");
}
}
}
作用:监控zookeeper连接状态,防止zookeeper连接还未成功建立就进行其他zookeeper操作。
2、LeaderNodeWatcher
package com.ai.xiajw.watch;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import java.util.concurrent.atomic.AtomicInteger;
public class LeaderNodeWatcher implements Watcher {
private AtomicInteger state;
public LeaderNodeWatcher(AtomicInteger state){
this.state = state;
}
@Override
public void process(WatchedEvent watchedEvent) {
Event.KeeperState keeperState = watchedEvent.getState();
Event.EventType eventType = watchedEvent.getType();
if(eventType.equals(Event.EventType.NodeDeleted)){
System.out.println("leader node deleted!");
state.getAndIncrement(); // 更改状态
}
}
}
作用: 监控leader目录,以便leader宕机后,迅速做出反应
package com.ai.xiajw.app;
import com.ai.xiajw.watch.DefaultWatcher;
import com.ai.xiajw.watch.LeaderNodeWatcher;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.Stat;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
public class MyLeaderLatch {
private ZooKeeper client;
private String connectString;
private String latchPath;
private String key;
private final static String lock_suffix = "latch-";
private CountDownLatch countDownLatch;
private boolean isLeader = false;
private String leaderPath;
private String myPath;
private AtomicInteger state = new AtomicInteger(State.NONE.getValue());
public MyLeaderLatch(String connectString,String latchPath,String key){
this.connectString = connectString;
this.latchPath = latchPath;
this.key = key;
ZooKeeper client = null;
countDownLatch = new CountDownLatch(1);
try {
client = new ZooKeeper(connectString,3000,new DefaultWatcher(countDownLatch));
} catch (IOException e) {
e.printStackTrace();
}
try {
// 等待zookeeper连接成功再进行其他操作
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
this.client = client;
}
public void start() throws KeeperException, InterruptedException {
Stat stat = client.exists(latchPath,false);
if(stat == null){
client.create(latchPath,new byte[0],ZooDefs.Ids.OPEN_ACL_UNSAFE,CreateMode.PERSISTENT);
}
seizeLeader();
if(checkLeaderShip()){
isLeader = true;
System.out.println("i'm the leader now");
}else{
isLeader = false;
System.out.println("i'm not the leader");
while(!isLeader){
Thread.sleep(1000);
if(state.compareAndSet(State.LEADER_DELETED.getValue(),State.NONE.getValue())){ // leader deleted
seizeLeader();
isLeader = checkLeaderShip();
}
}
}
}
private void seizeLeader() throws KeeperException, InterruptedException {
if(myPath !=null && !"".equals(myPath)){
client.delete(myPath,-1);
}
this.myPath = client.create(latchPath+"/"+lock_suffix,key.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
}
/**
* 检查leader关系
*
* @return true:是leader false:非leader
*/
private boolean checkLeaderShip() throws KeeperException, InterruptedException {
List children = client.getChildren(latchPath,false);
Collections.sort(children,new ChildrenComparator());
this.leaderPath = latchPath + "/" +children.get(0);
String data = new String(client.getData(latchPath+"/"+children.get(0),new LeaderNodeWatcher(state), new Stat()));
return key.equals(data);
}
private static int getSequential(String s){
return Integer.parseInt(s.substring(s.lastIndexOf(lock_suffix)+lock_suffix.length()));
}
public boolean isLeader(){
return isLeader;
}
class ChildrenComparator implements Comparator{
@Override
public int compare(String o1, String o2) {
int oo1 = getSequential(o1);
int oo2 = getSequential(o2);
return oo1 > oo2 ? 1 : oo1 < oo2 ? -1 : 0;
}
}
enum State{
NONE(0),
LEADER_DELETED(1);
private int intValue;
State(int value){
intValue = value;
}
public int getValue(){
return intValue;
}
}
}
package com.ai.xiajw.app;
import com.ai.xiajw.util.ZkConfig;
import org.apache.zookeeper.KeeperException;
import java.net.InetAddress;
import java.net.UnknownHostException;
/**
* 自己实现的高可用测试程序
*
* @author xiajw
*
*/
public class MyLeaderLatchTest {
public static void main(String[] args) throws UnknownHostException, InterruptedException, KeeperException {
ZkConfig.initConf();
String latchPath = ZkConfig.latchPath;
String connectString = ZkConfig.connectString;
String key = new StringBuilder("MyLeaderLatchTest#").append(InetAddress.getLocalHost().getHostAddress()).toString(); // 存放在znode上的data
MyLeaderLatch leaderLatch = new MyLeaderLatch(connectString,latchPath,key);
leaderLatch.start();
while(true){
if(leaderLatch.isLeader()){
System.out.println("........leader now ..........");
Thread.sleep(2000);
}
}
}
}
用两台虚拟机做测试,在其中一台上部署zookeeper,把程序打包上传,分别运行。
可以看到,第一台由于先启动,成为了leader并持续运行,此时第二台非leader,在等待。当我kill掉第一台的程序之后,第二台的备用节点抢占leader,接着运行,测试ok。
Curator是Netflix公司开源的一套zookeeper客户端框架,解决了很多Zookeeper客户端非常底层的细节开发工作
由于使用封装依赖,直接贴代码,原理也是一样,不过curator更加的完善。
package com.ai.xiajw.curator;
import com.ai.xiajw.util.ZkConfig;
import org.apache.curator.RetryPolicy;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.recipes.leader.LeaderLatch;
import org.apache.curator.framework.recipes.leader.LeaderLatchListener;
import org.apache.curator.retry.ExponentialBackoffRetry;
import java.net.InetAddress;
public class ZkLeaderLatch {
private static LeaderLatch leaderLatch;
private static CuratorFramework zkClient;
public ZkLeaderLatch(){
try{
final String id = String.format("zkLatchClient#%s", InetAddress.getLocalHost().getHostAddress());
System.out.println("zk:"+id+"客户端初始化....server:"+ ZkConfig.connectString+",latch path:"+ZkConfig.latchPath);
RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000,3);
zkClient = CuratorFrameworkFactory.builder()
.connectString(ZkConfig.connectString)
.sessionTimeoutMs(3000)
.retryPolicy(retryPolicy)
.build();
System.out.println("zk 客户端启动.....");
zkClient.start();
leaderLatch = new LeaderLatch(zkClient,ZkConfig.latchPath,id);
LeaderLatchListener leaderLatchListener = new LeaderLatchListener() {
public void isLeader() {
System.out.println("客户端:"+id+"不是主节点");
}
public void notLeader() {
System.out.println("客户端:"+id+" 成为主节点!");
}
};
leaderLatch.addListener(leaderLatchListener);
leaderLatch.start();
} catch (Exception e) {
e.printStackTrace();
}
}
public boolean isLeader() {
return leaderLatch.hasLeadership();
}
public CuratorFramework getClient(){
return zkClient;
}
public LeaderLatch getLatch(){
return leaderLatch;
}
}
测试类:
package com.ai.xiajw.app;
import com.ai.xiajw.curator.ZkLeaderLatch;
import com.ai.xiajw.util.ZkConfig;
import java.util.concurrent.atomic.AtomicInteger;
/**
* 基于Curator组件实现的高可用程序测试
*
*/
public class ZkTest {
public static void main(String[] args) throws InterruptedException {
ZkConfig.initConf();
ZkLeaderLatch zkLeaderLatch = new ZkLeaderLatch();
AtomicInteger ai = new AtomicInteger(0);
while(true){
if(zkLeaderLatch.isLeader()){
System.out.println(" exec count : "+ai.getAndIncrement());
}
}
}
}
差不多。省略。。
源码地址:https://github.com/xiajw2018/high-availability.git