Sharding-JDBC分布式ID生成算法snowflake源码详细解读

分布式ID生成算法的有很多种,Twitter的SnowFlake就是其中经典的一种。

Snowflake工作原理

对于分布式的ID生成,以Twitter Snowflake为代表的Flake 系列算法,属于划分命名空间并行生成的一种算法,生成的数据为64bit的long型数据,在数据库中应该用大于等于64bit的数字类型的字段来保存该值,比如在MySQL中应该使用BIGINT。

SnowFlake算法生成ID的结构如下图:

Sharding-JDBC分布式ID生成算法snowflake源码详细解读_第1张图片

1     符号位             等于 0

41    时间戳             从 2016/11/01 零点开始的毫秒数,支持 2 ^41 /365/24/60/60/1000=69.7年

10    工作进程编号        支持 1024 个进程

12    序列号             每毫秒从 0 开始自增,支持 4096 个编号

Sharding-jdbc实现的雪花算法核心源码解读:

源码地址:https://github.com/apache/incubator-shardingsphere/blob/dev/sharding-core/sharding-core-common/src/main/java/org/apache/shardingsphere/core/strategy/keygen/SnowflakeShardingKeyGenerator.java}

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import com.google.common.base.Preconditions;
import lombok.Getter;
import lombok.Setter;
import lombok.SneakyThrows;

import java.util.Calendar;
import java.util.Properties;

/**
 * Snowflake distributed primary key generator.
 * 
 * 

* Use snowflake algorithm. Length is 64 bit. *

* *
 * 1bit sign bit.
 * 41bits timestamp offset from 2016.11.01(ShardingSphere distributed primary key published data) to now.
 * 10bits worker process id.
 * 12bits auto increment offset in one mills
 * 
* *

* Call @{@code SnowflakeShardingKeyGenerator.setWorkerId} to set worker id, default value is 0. *

* *

* Call @{@code SnowflakeShardingKeyGenerator.setMaxTolerateTimeDifferenceMilliseconds} to set max tolerate time difference milliseconds, default value is 0. *

* * 1 符号位 等于 0 * 41 时间戳 从 2016/11/01 零点开始的毫秒数,支持 2 ^41 /365/24/60/60/1000=69.7年 * 10 工作进程编号 支持 1024 个进程 * 12 序列号 每毫秒从 0 开始自增,支持 4096 个编号 * * @author gaohongtao * @author panjuan */ public final class SnowflakeShardingKeyGenerator { /** * 起始时间的毫秒(千分之一秒)数 */ public static final long EPOCH; /** * 自增序列的bit位数(一个二进制数据0或1,是1bit) */ private static final long SEQUENCE_BITS = 12L; /** * 工作机器ID的bit位数(一个二进制数据0或1,是1bit) */ private static final long WORKER_ID_BITS = 10L; /** * 自增序列的掩码:4095,防止溢出 * << 左移,不分正负数,低位补0 * 那么1 << 12L 即二进制1右边补12个0,结果1000000000000,转为十进制是4096,相当于1乘以2的12次方 */ private static final long SEQUENCE_MASK = (1 << SEQUENCE_BITS) - 1; /** * 工作机器ID左移bit位数:自增序列的位数 */ private static final long WORKER_ID_LEFT_SHIFT_BITS = SEQUENCE_BITS; /** * 时间差左移bit位数:工作机器ID左移bit位数+工作机器ID的bit位数 */ private static final long TIMESTAMP_LEFT_SHIFT_BITS = WORKER_ID_LEFT_SHIFT_BITS + WORKER_ID_BITS; /** * 工作机器ID最大值:1<<10即10000000000,转十进制即1*2的10次方=1024 */ private static final long WORKER_ID_MAX_VALUE = 1L << WORKER_ID_BITS; /** * 工作机器ID默认值0 */ private static final long WORKER_ID = 0; /** * 最大容忍时间差毫秒数 */ private static final int MAX_TOLERATE_TIME_DIFFERENCE_MILLISECONDS = 10; @Setter private static TimeService timeService = new TimeService(); @Getter @Setter private Properties properties = new Properties(); private byte sequenceOffset; private long sequence; private long lastMilliseconds; static { Calendar calendar = Calendar.getInstance(); calendar.set(2016, Calendar.NOVEMBER, 1); calendar.set(Calendar.HOUR_OF_DAY, 0); calendar.set(Calendar.MINUTE, 0); calendar.set(Calendar.SECOND, 0); calendar.set(Calendar.MILLISECOND, 0); EPOCH = calendar.getTimeInMillis(); } public String getType() { return "SNOWFLAKE"; } public synchronized Comparable generateKey() { /** * 当前系统时间毫秒数 */ long currentMilliseconds = timeService.getCurrentMillis(); /** * 判断是否需要等待容忍时间差,如果需要,则等待时间差过去,然后再获取当前系统时间 */ if (waitTolerateTimeDifferenceIfNeed(currentMilliseconds)) { currentMilliseconds = timeService.getCurrentMillis(); } /** * 如果最后一次毫秒与 当前系统时间毫秒相同,即还在同一毫秒内 */ if (lastMilliseconds == currentMilliseconds) { /** * &位与运算符:两个数都转为二进制,如果相对应位都是1,则结果为1,否则为0 * 当序列为4095时,4095+1后的新序列与掩码进行位与运算结果是0 * 当序列为其他值时,位与运算结果都不会是0 * 即本毫秒的序列已经用到最大值4096,此时要取下一个毫秒时间值 */ if (0L == (sequence = (sequence + 1) & SEQUENCE_MASK)) { currentMilliseconds = waitUntilNextTime(currentMilliseconds); } } else { /** * 上一毫秒已经过去,把序列值重置为1 */ vibrateSequenceOffset(); sequence = sequenceOffset; } lastMilliseconds = currentMilliseconds; /** * XX......XX XX000000 00000000 00000000 时间差 XX * XXXXXX XXXX0000 00000000 机器ID XX * XXXX XXXXXXXX 序列号 XX * 三部分进行|位或运算:如果相对应位都是0,则结果为0,否则为1 */ return ((currentMilliseconds - EPOCH) << TIMESTAMP_LEFT_SHIFT_BITS) | (getWorkerId() << WORKER_ID_LEFT_SHIFT_BITS) | sequence; } /** * 判断是否需要等待容忍时间差 */ @SneakyThrows private boolean waitTolerateTimeDifferenceIfNeed(final long currentMilliseconds) { /** * 如果获取ID时的最后一次时间毫秒数小于等于当前系统时间毫秒数,属于正常情况,则不需要等待 */ if (lastMilliseconds <= currentMilliseconds) { return false; } /** * ===>时钟回拨的情况(生成序列的时间大于当前系统的时间),需要等待时间差 */ /** * 获取ID时的最后一次毫秒数减去当前系统时间毫秒数的时间差 */ long timeDifferenceMilliseconds = lastMilliseconds - currentMilliseconds; /** * 时间差小于最大容忍时间差,即当前还在时钟回拨的时间差之内 */ Preconditions.checkState(timeDifferenceMilliseconds < getMaxTolerateTimeDifferenceMilliseconds(), "Clock is moving backwards, last time is %d milliseconds, current time is %d milliseconds", lastMilliseconds, currentMilliseconds); /** * 线程休眠时间差 */ Thread.sleep(timeDifferenceMilliseconds); return true; } private long getWorkerId() { long result = Long.valueOf(properties.getProperty("worker.id", String.valueOf(WORKER_ID))); Preconditions.checkArgument(result >= 0L && result < WORKER_ID_MAX_VALUE); return result; } private int getMaxTolerateTimeDifferenceMilliseconds() { return Integer.valueOf(properties.getProperty("max.tolerate.time.difference.milliseconds", String.valueOf(MAX_TOLERATE_TIME_DIFFERENCE_MILLISECONDS))); } private long waitUntilNextTime(final long lastTime) { long result = timeService.getCurrentMillis(); while (result <= lastTime) { result = timeService.getCurrentMillis(); } return result; } /** * 把序列值重置为1 */ private void vibrateSequenceOffset() { /** * byte是8位二进制 * sequenceOffset默认值是0000 0000 * ~sequenceOffset取反运算后是1111 1111 * &1 位与运算后是0000 0001,转换为十进制就是1 */ sequenceOffset = (byte) (~sequenceOffset & 1); } }

 

使用注意:

WORKER_ID和MAX_TOLERATE_TIME_DIFFERENCE_MILLISECONDS使用配置文件设置,

sharding.jdbc.config.sharding.tables.t_order.key-generator.props.worker.id=1020

sharding.jdbc.config.sharding.tables.t_order.key-generator.props.max.tolerate.time.difference.milliseconds=60000

WORKER_ID最大限制是 2^10,因此只要满足小于 1024 即可。

针对IPV4:IP最大 255.255.255.255。而(255+255+255+255) < 1024。 因此采用IP段数值相加即可生成唯一的WORKER_ID,但是WORKER_ID不能重复。

启示:

  1. 根据自己业务修改每个位段存储的信息。算法是通用的,可以根据自己需求适当调整每段的大小以及存储的信息。
  2. 解密id,由于id的每段都保存了特定的信息,所以拿到一个id,应该可以尝试反推出原始的每个段的信息。反推出的信息可以帮助我们分析。比如作为订单,可以知道该订单的生成日期,负责处理的数据中心等等。

 

参考:

https://segmentfault.com/a/1190000011282426

https://www.cnblogs.com/hongdada/p/9324473.html

https://mp.weixin.qq.com/s?__biz=MzUzNTY4NTYxMA==&mid=2247483653&idx=1&sn=4ffc977dd14600d9180b79ec3dc827ef&chksm=fa80f180cdf778966b99f242dd62de7cd0ec1ff17b65982f1d387f1cccdab3709cf72af045ef&mpshare=1&scene=1&srcid=0320wPeo7xEoDcRl1MjfvAGX&pass_ticket=GSh11CDVU0rTeNni0ppS%2FHl3SdEgx8vYq8UQpe2lO005P3Oiy30WaFSlLRgjLNdS#rd

你可能感兴趣的:(源码解读)