Sharding JDBC通过SPI机制默认提供了两种分布式ID生成算法:UUID和雪花算法,UUID用作主键不太合适,因为是无序的,会影响查询性能;雪花算法是可以使用的。这两个算法定义在META-INF/services/org.apache.shardingsphere.sharding.spi.KeyGenerateAlgorithm文件中
org.apache.shardingsphere.sharding.algorithm.keygen.SnowflakeKeyGenerateAlgorithm
org.apache.shardingsphere.sharding.algorithm.keygen.UUIDKeyGenerateAlgorithm
通过阅读SnowflakeKeyGenerateAlgorithm,发现,它的workerId可以通过配置传递
spring:
shardingsphere:
rules:
sharding:
tables: # 需要分库表的规则配置
order_t:
actual-data-nodes: ds$->{0..2}.order_t # 待选数据节点:ds0.order_t、ds1.order_t、ds2.order_t
key-generate-strategy: # 分布式ID列,一般是主键
column: order_id
key-generator-name: snowflake # 使用雪花算法
database-strategy: # 分库策略配置
standard: # 标准算法,可选:standard、complex、hint、none
sharding-column: user_id # 分片列
sharding-algorithm-name: order_inline # 分片算法
key-generators: # 分布式ID生成算法
snowflake:
type: SNOWFLAKE # 内置雪花算法,参考:org.apache.shardingsphere.sharding.algorithm.keygen.SnowflakeKeyGenerateAlgorithm
props:
worker-id: 100
但是有一种场景会导致workerId重复,那就是一个微服务部署多个实例时,每个实例的workerId都一样,会有极小的概率发生分布式ID重复,笔者的项目就出现过这种情况。为了避免这种问题,需要对雪花算法的workerId生成进行优化,保证每个服务实例的workerId都不重复。
因此我设计了一个workId生成表,通过主键递增的方式产生workerId,算法描述为:
步骤2中的workerIdBits是workerId的位数
snowflake_worker_id_t表的定义如下:
CREATE TABLE `snowflake_worker_id_t` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '自增id主键',
`create_by` bigint(10) DEFAULT NULL COMMENT '创建人',
`creation_date` datetime DEFAULT NULL COMMENT '创建时间',
`last_update_by` bigint(10) DEFAULT NULL COMMENT '修改人',
`last_update_date` datetime DEFAULT NULL COMMENT '修改时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='雪花算法workerId自增';
雪花算法实现如下:
package com.alexon.distributed.id;
import com.alexon.distributed.id.mapper.ISnowflakeWorkerIdMapper;
import com.alexon.distributed.id.po.SnowflakeWorkerIdPo;
import com.alexon.model.utils.AppContextUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.RandomUtils;
import org.apache.commons.lang3.StringUtils;
import java.net.Inet4Address;
import java.net.UnknownHostException;
import java.util.Date;
/**
* Twitter_Snowflake
* SnowFlake的结构如下(每部分用-分开):
* 0 - 0000000000 0000000000 0000000000 0000000000 0 - 00000 - 00000 - 000000000000
* 1位标识,由于long基本类型在Java中是带符号的,最高位是符号位,正数是0,负数是1,所以id一般是正数,最高位是0
* 41位时间截(毫秒级),注意,41位时间截不是存储当前时间的时间截,而是存储时间截的差值(当前时间截 - 开始时间截)
* 得到的值),这里的的开始时间截,一般是我们的id生成器开始使用的时间,由我们程序来指定的(如下下面程序IdWorker类的startTime属性)。41位的时间截,可以使用69年,年T = (1L << 41) / (1000L * 60 * 60 * 24 * 365) = 69
* 10位的数据机器位,可以部署在1024个节点,包括5位datacenterId和5位workerId
* 12位序列,毫秒内的计数,12位的计数顺序号支持每个节点每毫秒(同一机器,同一时间截)产生4096个ID序号
* 加起来刚好64位,为一个Long型。
* SnowFlake的优点是,整体上按照时间自增排序,并且整个分布式系统内不会产生ID碰撞(由数据中心ID和机器ID作区分),并且效率较高,经测试,SnowFlake每秒能够产生26万ID左右。
*/
@Slf4j
public class SnowflakeIdGenerator {
/**
* 开始时间截 (2021-01-06)
*/
private final long twepoch = 1609944039799L;
/**
* 机器id所占的位数
*/
private final long workerIdBits = 9L;
/**
* 数据标识id所占的位数
*/
private final long dataCenterIdBits = 1L;
/**
* 支持的最大机器id = 2^workerIdBits
*/
private final long maxWorkerId = ~(-1L << workerIdBits);
/**
* 支持的最大数据标识id = 2^dataCenterIdBits
*/
private final long maxDataCenterId = ~(-1L << dataCenterIdBits);
/**
* 序列在id中占的位数
*/
private final long sequenceBits = 12L;
/**
* 机器ID向左移12位
*/
private final long workerIdShift = sequenceBits;
/**
* 数据标识id向左移17位(12+5)
*/
private final long dataCenterIdShift = sequenceBits + workerIdBits;
/**
* 时间截向左移22位(5+5+12)
*/
private final long timestampLeftShift = sequenceBits + workerIdBits + dataCenterIdBits;
/**
* 生成序列的掩码,这里为4095 (0b111111111111=0xfff=4095)
*/
private final long sequenceMask = ~(-1L << sequenceBits);
/**
* 工作机器ID(0~1024)
*/
private long workerId;
/**
* 数据中心ID(0)
*/
private long dataCenterId;
/**
* 毫秒内序列(0~4095)
*/
private long sequence = 0L;
/**
* 上次生成ID的时间截
*/
private long lastTimestamp = -1L;
/**
* 实例
*/
private volatile static SnowflakeIdGenerator INSTANCE;
/**
* 获取实例
*
* @return 实例
*/
public static SnowflakeIdGenerator getInstance() {
if (INSTANCE == null) {
synchronized (SnowflakeIdGenerator.class) {
if (INSTANCE == null) {
INSTANCE = new SnowflakeIdGenerator();
}
}
}
return INSTANCE;
}
public SnowflakeIdGenerator() {
long workerId = getWorkerId();
long dataCenterId = getDataCenterId();
if (workerId > maxWorkerId || workerId < 0) {
throw new IllegalArgumentException(String.format("workerId can't be greater than %d or less than 0", maxWorkerId));
}
if (dataCenterId > maxDataCenterId || dataCenterId < 0) {
throw new IllegalArgumentException(String.format("dataCenterId can't be greater than %d or less than 0", maxDataCenterId));
}
this.workerId = workerId;
this.dataCenterId = dataCenterId;
}
/**
* 获得下一个ID (该方法是线程安全的)
*
* @return SnowflakeId
*/
public synchronized Long nextId() {
long timestamp = timeGen();
//如果当前时间小于上一次ID生成的时间戳,说明系统时钟回退过这个时候应当抛出异常
if (timestamp < lastTimestamp) {
throw new RuntimeException(
String.format("Clock moved backwards. Refusing to generate id for %d milliseconds", lastTimestamp - timestamp));
}
//如果是同一时间生成的,则进行毫秒内序列
if (lastTimestamp == timestamp) {
sequence = (sequence + 1) & sequenceMask;
//毫秒内序列溢出
if (sequence == 0) {
//阻塞到下一个毫秒,获得新的时间戳
timestamp = tilNextMillis(lastTimestamp);
}
}
//时间戳改变,毫秒内序列重置
else {
sequence = 0L;
}
//上次生成ID的时间截
lastTimestamp = timestamp;
//移位并通过或运算拼到一起组成64位的ID
return ((timestamp - twepoch) << timestampLeftShift)
| (dataCenterId << dataCenterIdShift)
| (workerId << workerIdShift)
| sequence;
}
/**
* 阻塞到下一个毫秒,直到获得新的时间戳
*
* @param lastTimestamp 上次生成ID的时间截
* @return 当前时间戳
*/
protected long tilNextMillis(long lastTimestamp) {
long timestamp = timeGen();
while (timestamp <= lastTimestamp) {
timestamp = timeGen();
}
return timestamp;
}
/**
* 返回以毫秒为单位的当前时间
*
* @return 当前时间(毫秒)
*/
protected long timeGen() {
return System.currentTimeMillis();
}
private long getWorkerId() {
ISnowflakeWorkerIdMapper workerIdMapper = AppContextUtil.getBean(ISnowflakeWorkerIdMapper.class);
SnowflakeWorkerIdPo workerIdPo = new SnowflakeWorkerIdPo();
workerIdPo.setLastUpdateBy(-1L);
workerIdPo.setCreationDate(new Date());
workerIdPo.setLastUpdateBy(-1L);
workerIdPo.setLastUpdateDate(new Date());
workerIdMapper.insert(workerIdPo);
Long id = workerIdPo.getId();
int workerTotalNum = 1 << this.workerIdBits;
long workerId = id % workerTotalNum;
log.info("workerId={}", workerId);
return workerId;
}
private long getDataCenterId() {
int sums = 0;
int dateCenterTotalNum = 1 << this.dataCenterId;
try {
String hostAddress = Inet4Address.getLocalHost().getHostAddress();
int[] ints = StringUtils.toCodePoints(hostAddress);
for (int b : ints) {
sums += b;
}
} catch (UnknownHostException e) {
log.error("get local Host Address failed:", e);
return RandomUtils.nextLong(0, dateCenterTotalNum);
}
return sums % dateCenterTotalNum;
}
}
Sharding JDBC自定义分布式ID生成算法通过SPI方式提供扩展能力,算法需要实现接口KeyGenerateAlgorithm,然后要把类路径配置在资源目录的META-INF/services下的文件org.apache.shardingsphere.sharding.spi.KeyGenerateAlgorithm中(需要自己新建),然后填入类路径
com.xlt.sharding.keygenerator.BeautySnowflakeKeyAlgorithm
BeautySnowflakeKeyAlgorithm实现如下:
package com.xlt.sharding.keygenerator;
import com.alexon.distributed.id.SnowflakeIdGenerator;
import lombok.extern.slf4j.Slf4j;
import org.apache.shardingsphere.sharding.spi.KeyGenerateAlgorithm;
import java.util.Properties;
@Slf4j
public class BeautySnowflakeKeyAlgorithm implements KeyGenerateAlgorithm {
private Properties properties;
/**
* Get type.
*
* @return type
*/
@Override
public String getType() {
return "BEAUTY_SNOWFLAKE";
}
/**
* Generate key.
*
* @return generated key
*/
@Override
public Comparable<?> generateKey() {
Long nextId = SnowflakeIdGenerator.getInstance().nextId();
log.info("nextId={}", nextId);
return nextId;
}
/**
* Get properties.
*
* @return properties
*/
@Override
public Properties getProps() {
return properties;
}
/**
* Initialize SPI.
*
* @param props properties to be initialized
*/
@Override
public void init(Properties props) {
this.properties = props;
}
}
使用此自定义算法可以按如下方式进行配置
spring:
shardingsphere:
rules:
sharding:
tables: # 需要分库表的规则配置
order_t:
actual-data-nodes: ds$->{0..2}.order_t # 待选数据节点:ds0.order_t、ds1.order_t、ds2.order_t
key-generate-strategy: # 分布式ID列,一般是主键
column: order_id
key-generator-name: beautySnowflake # 使用雪花算法
database-strategy: # 分库策略配置
standard: # 标准算法,可选:standard、complex、hint、none
sharding-column: user_id # 分片列
sharding-algorithm-name: order_inline # 分片算法
key-generators: # 分布式ID生成算法
beautySnowflake:
type: BEAUTY_SNOWFLAKE
sharding-algorithms: # 分片算法,配置后可以在分片表的分片策略中被引用
order_inline:
type: INLINE
props:
algorithm-expression: ds$->{user_id % 3} # 将数据按user_id对3取余,均分到ds0、ds1、ds2