分布式自增ID算法---雪花算法 (snowflake,Java版)







  • 1位,不用。二进制中最高位为1的都是负数,但是我们生成的id一般都使用整数,所以这个最高位固定是0
  • 41位,用来记录时间戳(毫秒)。

    • 41位可以表示$2^{41}-1$个数字,
    • 如果只用来表示正整数(计算机中正数包含0),可以表示的数值范围是:0 至 $2^{41}-1$,减1是因为可表示的数值范围是从0开始算的,而不是1。
    • 也就是说41位可以表示$2^{41}-1$个毫秒的值,转化成单位年则是$(2^{41}-1) / (1000 * 60 * 60 * 24 * 365) = 69$年
  • 10位,用来记录工作机器id。

    • 可以部署在$2^{10} = 1024$个节点,包括5位datacenterId5位workerId
    • 5位(bit)可以表示的最大正整数是$2^{5}-1 = 31$,即可以用0、1、2、3、....31这32个数字,来表示不同的datecenterId或workerId
  • 12位,序列号,用来记录同毫秒内产生的不同id。

    • 12位(bit)可以表示的最大正整数是$2^{12}-1 = 4095$,即可以用0、1、2、3、....4094这4095个数字,来表示同一机器同一时间截(毫秒)内产生的4095个ID序号



  • 所有生成的id按时间趋势递增
  • 整个分布式系统内不会产生重复id(因为有datacenterId和workerId来做区分)



  1 /** Copyright 2010-2012 Twitter, Inc.*/
  2 package com.twitter.service.snowflake
  4 import com.twitter.ostrich.stats.Stats
  5 import com.twitter.service.snowflake.gen._
  6 import java.util.Random
  7 import com.twitter.logging.Logger
  9 /**
 10  * An object that generates IDs.
 11  * This is broken into a separate class in case
 12  * we ever want to support multiple worker threads
 13  * per process
 14  */
 15 class IdWorker(val workerId: Long, val datacenterId: Long, private val reporter: Reporter, var sequence: Long = 0L)
 16 extends Snowflake.Iface {
 17   private[this] def genCounter(agent: String) = {
 18     Stats.incr("ids_generated")
 19     Stats.incr("ids_generated_%s".format(agent))
 20   }
 21   private[this] val exceptionCounter = Stats.getCounter("exceptions")
 22   private[this] val log = Logger.get
 23   private[this] val rand = new Random
 25   val twepoch = 1288834974657L
 27   private[this] val workerIdBits = 5L
 28   private[this] val datacenterIdBits = 5L
 29   private[this] val maxWorkerId = -1L ^ (-1L << workerIdBits)
 30   private[this] val maxDatacenterId = -1L ^ (-1L << datacenterIdBits)
 31   private[this] val sequenceBits = 12L
 33   private[this] val workerIdShift = sequenceBits
 34   private[this] val datacenterIdShift = sequenceBits + workerIdBits
 35   private[this] val timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits
 36   private[this] val sequenceMask = -1L ^ (-1L << sequenceBits)
 38   private[this] var lastTimestamp = -1L
 40   // sanity check for workerId
 41   if (workerId > maxWorkerId || workerId < 0) {
 42     exceptionCounter.incr(1)
 43     throw new IllegalArgumentException("worker Id can't be greater than %d or less than 0".format(maxWorkerId))
 44   }
 46   if (datacenterId > maxDatacenterId || datacenterId < 0) {
 47     exceptionCounter.incr(1)
 48     throw new IllegalArgumentException("datacenter Id can't be greater than %d or less than 0".format(maxDatacenterId))
 49   }
 51   log.info("worker starting. timestamp left shift %d, datacenter id bits %d, worker id bits %d, sequence bits %d, workerid %d",
 52     timestampLeftShift, datacenterIdBits, workerIdBits, sequenceBits, workerId)
 54   def get_id(useragent: String): Long = {
 55     if (!validUseragent(useragent)) {
 56       exceptionCounter.incr(1)
 57       throw new InvalidUserAgentError
 58     }
 60     val id = nextId()
 61     genCounter(useragent)
 63     reporter.report(new AuditLogEntry(id, useragent, rand.nextLong))
 64     id
 65   }
 67   def get_worker_id(): Long = workerId
 68   def get_datacenter_id(): Long = datacenterId
 69   def get_timestamp() = System.currentTimeMillis
 71   protected[snowflake] def nextId(): Long = synchronized {
 72     var timestamp = timeGen()
 74     if (timestamp < lastTimestamp) {
 75       exceptionCounter.incr(1)
 76       log.error("clock is moving backwards.  Rejecting requests until %d.", lastTimestamp);
 77       throw new InvalidSystemClock("Clock moved backwards.  Refusing to generate id for %d milliseconds".format(
 78         lastTimestamp - timestamp))
 79     }
 81     if (lastTimestamp == timestamp) {
 82       sequence = (sequence + 1) & sequenceMask
 83       if (sequence == 0) {
 84         timestamp = tilNextMillis(lastTimestamp)
 85       }
 86     } else {
 87       sequence = 0
 88     }
 90     lastTimestamp = timestamp
 91     ((timestamp - twepoch) << timestampLeftShift) |
 92       (datacenterId << datacenterIdShift) |
 93       (workerId << workerIdShift) | 
 94       sequence
 95   }
 97   protected def tilNextMillis(lastTimestamp: Long): Long = {
 98     var timestamp = timeGen()
 99     while (timestamp <= lastTimestamp) {
100       timestamp = timeGen()
101     }
102     timestamp
103   }
105   protected def timeGen(): Long = System.currentTimeMillis()
107   val AgentParser = """([a-zA-Z][a-zA-Z\-0-9]*)""".r
109   def validUseragent(useragent: String): Boolean = useragent match {
110     case AgentParser(_) => true
111     case _ => false
112   }
113 }




  1 package com.test.util;
  2 /**
  3  * Twitter_Snowflake
4 * SnowFlake的结构如下(每部分用-分开):
5 * 0 - 0000000000 0000000000 0000000000 0000000000 0 - 00000 - 00000 - 000000000000
6 * 1位标识,由于long基本类型在Java中是带符号的,最高位是符号位,正数是0,负数是1,所以id一般是正数,最高位是0
7 * 41位时间截(毫秒级),注意,41位时间截不是存储当前时间的时间截,而是存储时间截的差值(当前时间截 - 开始时间截) 8 * 得到的值),这里的的开始时间截,一般是我们的id生成器开始使用的时间,由我们程序来指定的(如下下面程序IdWorker类的startTime属性)。41位的时间截,可以使用69年,年T = (1L << 41) / (1000L * 60 * 60 * 24 * 365) = 69
9 * 10位的数据机器位,可以部署在1024个节点,包括5位datacenterId和5位workerId
10 * 12位序列,毫秒内的计数,12位的计数顺序号支持每个节点每毫秒(同一机器,同一时间截)产生4096个ID序号
11 * 加起来刚好64位,为一个Long型。
12 * SnowFlake的优点是,整体上按照时间自增排序,并且整个分布式系统内不会产生ID碰撞(由数据中心ID和机器ID作区分),并且效率较高,经测试,SnowFlake每秒能够产生26万ID左右。 13 */ 14 public class SnowflakeIdWorker { 15 16 // ==============================Fields=========================================== 17 /** 开始时间截 (2015-01-01) */ 18 private final long twepoch = 1420041600000L; 19 20 /** 机器id所占的位数 */ 21 private final long workerIdBits = 5L; 22 23 /** 数据标识id所占的位数 */ 24 private final long datacenterIdBits = 5L; 25 26 /** 支持的最大机器id,结果是31 (这个移位算法可以很快的计算出几位二进制数所能表示的最大十进制数) */ 27 private final long maxWorkerId = -1L ^ (-1L << workerIdBits); 28 29 /** 支持的最大数据标识id,结果是31 */ 30 private final long maxDatacenterId = -1L ^ (-1L << datacenterIdBits); 31 32 /** 序列在id中占的位数 */ 33 private final long sequenceBits = 12L; 34 35 /** 机器ID向左移12位 */ 36 private final long workerIdShift = sequenceBits; 37 38 /** 数据标识id向左移17位(12+5) */ 39 private final long datacenterIdShift = sequenceBits + workerIdBits; 40 41 /** 时间截向左移22位(5+5+12) */ 42 private final long timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits; 43 44 /** 生成序列的掩码,这里为4095 (0b111111111111=0xfff=4095) */ 45 private final long sequenceMask = -1L ^ (-1L << sequenceBits); 46 47 /** 工作机器ID(0~31) */ 48 private long workerId; 49 50 /** 数据中心ID(0~31) */ 51 private long datacenterId; 52 53 /** 毫秒内序列(0~4095) */ 54 private long sequence = 0L; 55 56 /** 上次生成ID的时间截 */ 57 private long lastTimestamp = -1L; 58 59 //==============================Constructors===================================== 60 /** 61 * 构造函数 62 * @param workerId 工作ID (0~31) 63 * @param datacenterId 数据中心ID (0~31) 64 */ 65 public SnowflakeIdWorker(long workerId, long datacenterId) { 66 if (workerId > maxWorkerId || workerId < 0) { 67 throw new IllegalArgumentException(String.format("worker Id can't be greater than %d or less than 0", maxWorkerId)); 68 } 69 if (datacenterId > maxDatacenterId || datacenterId < 0) { 70 throw new IllegalArgumentException(String.format("datacenter Id can't be greater than %d or less than 0", maxDatacenterId)); 71 } 72 this.workerId = workerId; 73 this.datacenterId = datacenterId; 74 } 75 76 // ==============================Methods========================================== 77 /** 78 * 获得下一个ID (该方法是线程安全的) 79 * @return SnowflakeId 80 */ 81 public synchronized long nextId() { 82 long timestamp = timeGen(); 83 84 //如果当前时间小于上一次ID生成的时间戳,说明系统时钟回退过这个时候应当抛出异常 85 if (timestamp < lastTimestamp) { 86 throw new RuntimeException( 87 String.format("Clock moved backwards. Refusing to generate id for %d milliseconds", lastTimestamp - timestamp)); 88 } 89 90 //如果是同一时间生成的,则进行毫秒内序列 91 if (lastTimestamp == timestamp) { 92 sequence = (sequence + 1) & sequenceMask; 93 //毫秒内序列溢出 94 if (sequence == 0) { 95 //阻塞到下一个毫秒,获得新的时间戳 96 timestamp = tilNextMillis(lastTimestamp); 97 } 98 } 99 //时间戳改变,毫秒内序列重置 100 else { 101 sequence = 0L; 102 } 103 104 //上次生成ID的时间截 105 lastTimestamp = timestamp; 106 107 //移位并通过或运算拼到一起组成64位的ID 108 return ((timestamp - twepoch) << timestampLeftShift) // 109 | (datacenterId << datacenterIdShift) // 110 | (workerId << workerIdShift) // 111 | sequence; 112 } 113 114 /** 115 * 阻塞到下一个毫秒,直到获得新的时间戳 116 * @param lastTimestamp 上次生成ID的时间截 117 * @return 当前时间戳 118 */ 119 protected long tilNextMillis(long lastTimestamp) { 120 long timestamp = timeGen(); 121 while (timestamp <= lastTimestamp) { 122 timestamp = timeGen(); 123 } 124 return timestamp; 125 } 126 127 /** 128 * 返回以毫秒为单位的当前时间 129 * @return 当前时间(毫秒) 130 */ 131 protected long timeGen() { 132 return System.currentTimeMillis(); 133 } 134 135 //==============================Test============================================= 136 /** 测试 */ 137 public static void main(String[] args) { 138 SnowflakeIdWorker idWorker = new SnowflakeIdWorker(0, 0); 139 140 for (int i = 0; i < 100; i++) { 141 long id = idWorker.nextId(); 142 System.out.println(Long.toBinaryString(id)); 143 System.out.println(id); 144 } 145 } 146 }

