ShardingJDBC 自定义ShardingStrategyConfiguration

ShardingJDBC 自定义ShardingStrategyConfiguration

昨晚平台新版本发布后,发现shardingJDBC查询时间段范围内的效率非常慢(平均20S左右),虽然分表较多(tab20191125-tab20200323,一共18张表,单表平均20W数据),但时间范围内的数据量不是非常多,理论上应该很快才对。
后面再去查了下,发现我用的StandardShardingStrategy提供PreciseShardingAlgorithm和RangeShardingAlgorithm两个分片算法。

  • PreciseShardingAlgorithm是必选的,用于处理=和IN的分片。
  • RangeShardingAlgorithm是可选的,用于处理BETWEEN AND分片,如果不配置RangeShardingAlgorithm,SQL中的BETWEEN AND将按照全库路由处理。

但我只配置了PreciseShardingAlgorithm,所以shardingJDBC遇到范围查询时,进行了全表查询,即所有18张表都进行查询,因此很慢。

自定义分片策略

1.SuperTableShardingAlgorithm 用于精准查询


package com.fpi.cloud.shardingjdbc;

import java.util.Collection;
import java.util.Date;

import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import com.fpi.cloud.utils.DateUtils;

import io.shardingjdbc.core.api.algorithm.sharding.PreciseShardingValue;
import io.shardingjdbc.core.api.algorithm.sharding.standard.PreciseShardingAlgorithm;

/**
 * 
 * @ClassName: Super9000TableShardingAlgorithm
 * @Description 精准查询
 * @author:luchenxi
 * @date: 2019年11月13日 上午10:02:30
 *
 *             注意:本内容仅限内部传阅,禁用于其他的商业目
 */
public class SuperTableShardingAlgorithm implements PreciseShardingAlgorithm<Date>
{
	private Logger logger = LogManager.getLogger(SuperTableShardingAlgorithm.class);

	/**
	 * 
	 * <p> Title: doSharding </p> <p> Description: 精确查询 IN = </p>
	 * 
	 * @param availableTargetNames
	 * @param shardingValue
	 * @return
	 * @see io.shardingjdbc.core.api.algorithm.sharding.standard.PreciseShardingAlgorithm#doSharding(java.util.Collection,
	 *      io.shardingjdbc.core.api.algorithm.sharding.PreciseShardingValue)
	 */
	@ Override
	public String doSharding(Collection<String> availableTargetNames , PreciseShardingValue<Date> shardingValue)
	{
		logger.info("精确分表策略生效...");
		Date creatTime = shardingValue.getValue();
		if(creatTime != null)
		{
			// 查询所在周的周一日期后缀
			String mondayStr = DateUtils.getWeekMonday(creatTime);
			logger.info("目标表后缀 : " + mondayStr);
			for(String each : availableTargetNames)
			{
				if(each.endsWith(mondayStr))
				{
					logger.info("实际表 : " + each);
					return each;
				}
			}
		}
		throw new IllegalArgumentException();
	
}

2.SuperTableShardingRangeAlgorithm 用范围查询


package com.fpi.cloud.shardingjdbc;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;

import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import com.fpi.cloud.utils.DateUtils;
import com.google.common.collect.Range;

import io.shardingjdbc.core.api.algorithm.sharding.RangeShardingValue;
import io.shardingjdbc.core.api.algorithm.sharding.standard.RangeShardingAlgorithm;

/**
 * 
 * @ClassName: SuperTableShardingRangeAlgorithm
 * @Description:范围查询
 * @author: luchenxi 18163
 * @date: 2020年3月24日 上午10:40:43
 */
public class SuperTableShardingRangeAlgorithm implements RangeShardingAlgorithm<Date>
{
	private Logger logger = LogManager.getLogger(SuperTableShardingRangeAlgorithm.class);

	/**
	 * 
	 * <p>Title: doSharding</p> <p>Description: </p>
	 * @param availableTargetNames
	 * @param shardingValue
	 * @return
	 * @see io.shardingjdbc.core.api.algorithm.sharding.standard.RangeShardingAlgorithm#doSharding(java.util.Collection,
	 *      io.shardingjdbc.core.api.algorithm.sharding.RangeShardingValue)
	 */
	@ Override
	public Collection<String> doSharding(Collection<String> availableTargetNames ,
		RangeShardingValue<Date> shardingValue)
	{
		Collection<String> tabs = new ArrayList<>();
		logger.info("范围分表策略生效...");
		Range<Date> timeRange = shardingValue.getValueRange();
		if(timeRange != null)
		{
			Date startTime = timeRange.lowerEndpoint();
			Date endTime = timeRange.upperEndpoint();
			// 查询所在周的周一日期后缀
			String startDateMondayStr = DateUtils.getWeekMonday(startTime);
			String endDateMondayStr = DateUtils.getWeekMonday(endTime);
			logger.info("目标表后缀 : " + startDateMondayStr + "-" + endDateMondayStr);
			try
			{
				Date startDate = DateUtils.parseShortDate(startDateMondayStr);
				Date endDate = DateUtils.parseShortDate(endDateMondayStr);
				for(String each : availableTargetNames)
				{
					//lga_lga8000paramsampling_super_20191125
					String[] arr = each.split("_");
					String dateStr = arr[3];
					Date curDate = DateUtils.parseShortDate(dateStr);
					if(curDate.compareTo(startDate) >= 0 && curDate.compareTo(endDate) <= 0)
					{
						//范围内
						logger.info(each);
						tabs.add(each);
					}
				}
				return tabs;
			}
			catch (ParseException e)
			{
				logger.error("日期解析错误", e);
			}
		}
		throw new IllegalArgumentException();
	}
}

3.MyShardingStrategyConfiguration


package com.fpi.cloud.shardingjdbc;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;

import io.shardingjdbc.core.api.algorithm.sharding.standard.PreciseShardingAlgorithm;
import io.shardingjdbc.core.api.algorithm.sharding.standard.RangeShardingAlgorithm;
import io.shardingjdbc.core.api.config.strategy.ShardingStrategyConfiguration;
import io.shardingjdbc.core.routing.strategy.ShardingAlgorithmFactory;
import io.shardingjdbc.core.routing.strategy.ShardingStrategy;
import io.shardingjdbc.core.routing.strategy.standard.StandardShardingStrategy;
import lombok.Getter;

/**
 * 
 * @ClassName: MyShardingStrategyConfiguration
 * @Description:ShardingStrategyConfiguration
 * @author: luchenxi
 * @date: 2020年3月24日 上午10:44:43
 */
@ Getter
public class MyShardingStrategyConfiguration implements ShardingStrategyConfiguration
{
	private String shardingColumn;
	private String preciseAlgorithmClassName;
	private String rangeAlgorithmClassName;

	public MyShardingStrategyConfiguration(String shardingColumn , String preciseAlgorithmClassName ,
		String rangeAlgorithmClassName)
	{
		this.shardingColumn = shardingColumn;
		this.preciseAlgorithmClassName = preciseAlgorithmClassName;
		this.rangeAlgorithmClassName = rangeAlgorithmClassName;
	}

	@ Override
	public ShardingStrategy build()
	{
		Preconditions.checkNotNull(shardingColumn, "Sharding column cannot be null.");
		Preconditions.checkNotNull(preciseAlgorithmClassName, "Precise algorithm class cannot be null.");
		if(Strings.isNullOrEmpty(rangeAlgorithmClassName))
		{
			return new StandardShardingStrategy(shardingColumn, ShardingAlgorithmFactory
				.newInstance(preciseAlgorithmClassName, PreciseShardingAlgorithm.class));
		}
		return new StandardShardingStrategy(shardingColumn,
			ShardingAlgorithmFactory.newInstance(preciseAlgorithmClassName, PreciseShardingAlgorithm.class),
			ShardingAlgorithmFactory.newInstance(rangeAlgorithmClassName, RangeShardingAlgorithm.class));
	}
}

4.RuleConfigFactory

public class RuleConfigFactory
{
	public static TableRuleConfiguration getRuleConfig()
	{
		TableRuleConfiguration orderTableRuleConfig = new TableRuleConfiguration();
		// 期望的基础表名
		orderTableRuleConfig.setLogicTable("actab");
		// 配置实际表 (我是存在redis中的,当前已经有的表,每周新增一个)
	    String actualTbs = ShardingJDBCDataSourceFactory
			.getActualTables(RedisKey.SHARDINGDBNAME_8000_SPAM_SUPER);
		orderTableRuleConfig
			.setActualDataNodes("deviceDataSource.actab_${[" + actualTbs + "]}");
		// 分表依据字段
		orderTableRuleConfig.setKeyGeneratorColumnName("gmt_create");
		return orderTableRuleConfig;
	}
	
}

5.ShardingDataSource

	    // 配置真实数据源
		Map<String , DataSource> dataSourceMap = new HashMap<>();
		dataSourceMap.put("deviceDataSource", deviceDataSource);
		// 配置shardingJDBC
		ShardingRuleConfiguration shardingRuleConfig = new ShardingRuleConfiguration()
		// 分表规则
	    shardingRuleConfig.getTableRuleConfigs().add(RuleConfigFactory.getRuleConfig());
		// 分表绑定
		shardingRuleConfig.getBindingTableGroups().add("actab");
		shardingRuleConfig.setDefaultTableShardingStrategyConfig(
			new MyShardingStrategyConfiguration("gmt_create", SuperTableShardingAlgorithm.class.getName(),
				SuperTableShardingRangeAlgorithm.class.getName()));
		ShardingDataSource rDataSource = new ShardingDataSource(shardingRuleConfig.build(dataSourceMap));

查询

1.数据库中所有表

  • actab_20191125
  • actab_20191202
  • … …
  • actab_20200323

2.时间查询范围

  • startTime : 2020-03-21
  • endTime : 2020-03-23

通过ShardingJDBC去查询的话,应该只需要查询actab_20200316 和 actab_20200323这两张表就可以,不需要全部的表都进行查询。

	<select id="getSuperParamsValues" resultType="java.util.HashMap"> 
    	SELECT 
    		gmt_create AS refreshTime,
	        column1 AS paramValue
    	FROM 
    		actab
    	WHERE 
    		gmt_create BETWEEN #{startTime} AND #{endTime}
    	ORDER BY gmt_create DESC
    	LIMIT #{pageOffset},#{pageSize};
    </select>

注意

<select id="getSuperParamsValues" resultType="java.util.HashMap"> 
    	SELECT 
    		gmt_create AS refreshTime,
	        column1 AS paramValue
    	FROM 
    		actab
    	WHERE 
     <![CDATA[ 	  DATE_FORMAT(gmt_create, '%Y%m%d') >= DATE_FORMAT(#{startTime}, '%Y%m%d')  ]]>
	 <![CDATA[ 	  DATE_FORMAT(gmt_create, '%Y%m%d') <= DATE_FORMAT(#{endTime}, '%Y%m%d') ]]>
    	ORDER BY gmt_create DESC
    	LIMIT #{pageOffset},#{pageSize};
    </select>

一开始我是这么写的,但是似乎不走我配置好的策略,还是走的全部表查询。按官方的说法,RangeShardingAlgorith用于处理BETWEEN AND分片,那这种大于小于号的写法可能是不支持。

然后我又改成如下写法,还是不行。

<select id="getSuperParamsValues" resultType="java.util.HashMap"> 
    	SELECT 
    		gmt_create AS refreshTime,
	        column1 AS paramValue
    	FROM 
    		actab
    	WHERE 
    		DATE_FORMAT(gmt_create, '%Y%m%d') BETWEEN DATE_FORMAT(#{startTime}, '%Y%m%d') AND DATE_FORMAT(#{endTime}, '%Y%m%d')
    	ORDER BY gmt_create DESC
    	LIMIT #{pageOffset},#{pageSize};
    </select>

不知道是因为DATE_FORMAT后ShardingJDBC就不认识我配置的这个gmt_create的key了还是别的原因,这个需要对mybatis和shardingjdbc再实验下才能知道了,有大佬知道的话可以指导一下,不胜感激!

这里必须吐槽下ShardingJDBC,官方文档是真的烂!!!!写的什么玩意!!

因为是第一次用这个,完全是自己摸索着来,面向百度的编程,还存在很多的坑得去踩,大家共勉吧。

你可能感兴趣的:(工业大数据)