昨晚平台新版本发布后,发现shardingJDBC查询时间段范围内的效率非常慢(平均20S左右),虽然分表较多(tab20191125-tab20200323,一共18张表,单表平均20W数据),但时间范围内的数据量不是非常多,理论上应该很快才对。
后面再去查了下,发现我用的StandardShardingStrategy提供PreciseShardingAlgorithm和RangeShardingAlgorithm两个分片算法。
但我只配置了PreciseShardingAlgorithm,所以shardingJDBC遇到范围查询时,进行了全表查询,即所有18张表都进行查询,因此很慢。
1.SuperTableShardingAlgorithm 用于精准查询
package com.fpi.cloud.shardingjdbc;
import java.util.Collection;
import java.util.Date;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import com.fpi.cloud.utils.DateUtils;
import io.shardingjdbc.core.api.algorithm.sharding.PreciseShardingValue;
import io.shardingjdbc.core.api.algorithm.sharding.standard.PreciseShardingAlgorithm;
/**
*
* @ClassName: Super9000TableShardingAlgorithm
* @Description 精准查询
* @author:luchenxi
* @date: 2019年11月13日 上午10:02:30
*
* 注意:本内容仅限内部传阅,禁用于其他的商业目
*/
public class SuperTableShardingAlgorithm implements PreciseShardingAlgorithm<Date>
{
private Logger logger = LogManager.getLogger(SuperTableShardingAlgorithm.class);
/**
*
* <p> Title: doSharding </p> <p> Description: 精确查询 IN = </p>
*
* @param availableTargetNames
* @param shardingValue
* @return
* @see io.shardingjdbc.core.api.algorithm.sharding.standard.PreciseShardingAlgorithm#doSharding(java.util.Collection,
* io.shardingjdbc.core.api.algorithm.sharding.PreciseShardingValue)
*/
@ Override
public String doSharding(Collection<String> availableTargetNames , PreciseShardingValue<Date> shardingValue)
{
logger.info("精确分表策略生效...");
Date creatTime = shardingValue.getValue();
if(creatTime != null)
{
// 查询所在周的周一日期后缀
String mondayStr = DateUtils.getWeekMonday(creatTime);
logger.info("目标表后缀 : " + mondayStr);
for(String each : availableTargetNames)
{
if(each.endsWith(mondayStr))
{
logger.info("实际表 : " + each);
return each;
}
}
}
throw new IllegalArgumentException();
}
2.SuperTableShardingRangeAlgorithm 用范围查询
package com.fpi.cloud.shardingjdbc;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import com.fpi.cloud.utils.DateUtils;
import com.google.common.collect.Range;
import io.shardingjdbc.core.api.algorithm.sharding.RangeShardingValue;
import io.shardingjdbc.core.api.algorithm.sharding.standard.RangeShardingAlgorithm;
/**
*
* @ClassName: SuperTableShardingRangeAlgorithm
* @Description:范围查询
* @author: luchenxi 18163
* @date: 2020年3月24日 上午10:40:43
*/
public class SuperTableShardingRangeAlgorithm implements RangeShardingAlgorithm<Date>
{
private Logger logger = LogManager.getLogger(SuperTableShardingRangeAlgorithm.class);
/**
*
* <p>Title: doSharding</p> <p>Description: </p>
* @param availableTargetNames
* @param shardingValue
* @return
* @see io.shardingjdbc.core.api.algorithm.sharding.standard.RangeShardingAlgorithm#doSharding(java.util.Collection,
* io.shardingjdbc.core.api.algorithm.sharding.RangeShardingValue)
*/
@ Override
public Collection<String> doSharding(Collection<String> availableTargetNames ,
RangeShardingValue<Date> shardingValue)
{
Collection<String> tabs = new ArrayList<>();
logger.info("范围分表策略生效...");
Range<Date> timeRange = shardingValue.getValueRange();
if(timeRange != null)
{
Date startTime = timeRange.lowerEndpoint();
Date endTime = timeRange.upperEndpoint();
// 查询所在周的周一日期后缀
String startDateMondayStr = DateUtils.getWeekMonday(startTime);
String endDateMondayStr = DateUtils.getWeekMonday(endTime);
logger.info("目标表后缀 : " + startDateMondayStr + "-" + endDateMondayStr);
try
{
Date startDate = DateUtils.parseShortDate(startDateMondayStr);
Date endDate = DateUtils.parseShortDate(endDateMondayStr);
for(String each : availableTargetNames)
{
//lga_lga8000paramsampling_super_20191125
String[] arr = each.split("_");
String dateStr = arr[3];
Date curDate = DateUtils.parseShortDate(dateStr);
if(curDate.compareTo(startDate) >= 0 && curDate.compareTo(endDate) <= 0)
{
//范围内
logger.info(each);
tabs.add(each);
}
}
return tabs;
}
catch (ParseException e)
{
logger.error("日期解析错误", e);
}
}
throw new IllegalArgumentException();
}
}
3.MyShardingStrategyConfiguration
package com.fpi.cloud.shardingjdbc;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import io.shardingjdbc.core.api.algorithm.sharding.standard.PreciseShardingAlgorithm;
import io.shardingjdbc.core.api.algorithm.sharding.standard.RangeShardingAlgorithm;
import io.shardingjdbc.core.api.config.strategy.ShardingStrategyConfiguration;
import io.shardingjdbc.core.routing.strategy.ShardingAlgorithmFactory;
import io.shardingjdbc.core.routing.strategy.ShardingStrategy;
import io.shardingjdbc.core.routing.strategy.standard.StandardShardingStrategy;
import lombok.Getter;
/**
*
* @ClassName: MyShardingStrategyConfiguration
* @Description:ShardingStrategyConfiguration
* @author: luchenxi
* @date: 2020年3月24日 上午10:44:43
*/
@ Getter
public class MyShardingStrategyConfiguration implements ShardingStrategyConfiguration
{
private String shardingColumn;
private String preciseAlgorithmClassName;
private String rangeAlgorithmClassName;
public MyShardingStrategyConfiguration(String shardingColumn , String preciseAlgorithmClassName ,
String rangeAlgorithmClassName)
{
this.shardingColumn = shardingColumn;
this.preciseAlgorithmClassName = preciseAlgorithmClassName;
this.rangeAlgorithmClassName = rangeAlgorithmClassName;
}
@ Override
public ShardingStrategy build()
{
Preconditions.checkNotNull(shardingColumn, "Sharding column cannot be null.");
Preconditions.checkNotNull(preciseAlgorithmClassName, "Precise algorithm class cannot be null.");
if(Strings.isNullOrEmpty(rangeAlgorithmClassName))
{
return new StandardShardingStrategy(shardingColumn, ShardingAlgorithmFactory
.newInstance(preciseAlgorithmClassName, PreciseShardingAlgorithm.class));
}
return new StandardShardingStrategy(shardingColumn,
ShardingAlgorithmFactory.newInstance(preciseAlgorithmClassName, PreciseShardingAlgorithm.class),
ShardingAlgorithmFactory.newInstance(rangeAlgorithmClassName, RangeShardingAlgorithm.class));
}
}
4.RuleConfigFactory
public class RuleConfigFactory
{
public static TableRuleConfiguration getRuleConfig()
{
TableRuleConfiguration orderTableRuleConfig = new TableRuleConfiguration();
// 期望的基础表名
orderTableRuleConfig.setLogicTable("actab");
// 配置实际表 (我是存在redis中的,当前已经有的表,每周新增一个)
String actualTbs = ShardingJDBCDataSourceFactory
.getActualTables(RedisKey.SHARDINGDBNAME_8000_SPAM_SUPER);
orderTableRuleConfig
.setActualDataNodes("deviceDataSource.actab_${[" + actualTbs + "]}");
// 分表依据字段
orderTableRuleConfig.setKeyGeneratorColumnName("gmt_create");
return orderTableRuleConfig;
}
}
5.ShardingDataSource
// 配置真实数据源
Map<String , DataSource> dataSourceMap = new HashMap<>();
dataSourceMap.put("deviceDataSource", deviceDataSource);
// 配置shardingJDBC
ShardingRuleConfiguration shardingRuleConfig = new ShardingRuleConfiguration()
// 分表规则
shardingRuleConfig.getTableRuleConfigs().add(RuleConfigFactory.getRuleConfig());
// 分表绑定
shardingRuleConfig.getBindingTableGroups().add("actab");
shardingRuleConfig.setDefaultTableShardingStrategyConfig(
new MyShardingStrategyConfiguration("gmt_create", SuperTableShardingAlgorithm.class.getName(),
SuperTableShardingRangeAlgorithm.class.getName()));
ShardingDataSource rDataSource = new ShardingDataSource(shardingRuleConfig.build(dataSourceMap));
1.数据库中所有表
2.时间查询范围
通过ShardingJDBC去查询的话,应该只需要查询actab_20200316 和 actab_20200323这两张表就可以,不需要全部的表都进行查询。
<select id="getSuperParamsValues" resultType="java.util.HashMap">
SELECT
gmt_create AS refreshTime,
column1 AS paramValue
FROM
actab
WHERE
gmt_create BETWEEN #{startTime} AND #{endTime}
ORDER BY gmt_create DESC
LIMIT #{pageOffset},#{pageSize};
</select>
<select id="getSuperParamsValues" resultType="java.util.HashMap">
SELECT
gmt_create AS refreshTime,
column1 AS paramValue
FROM
actab
WHERE
<![CDATA[ DATE_FORMAT(gmt_create, '%Y%m%d') >= DATE_FORMAT(#{startTime}, '%Y%m%d') ]]>
<![CDATA[ DATE_FORMAT(gmt_create, '%Y%m%d') <= DATE_FORMAT(#{endTime}, '%Y%m%d') ]]>
ORDER BY gmt_create DESC
LIMIT #{pageOffset},#{pageSize};
</select>
一开始我是这么写的,但是似乎不走我配置好的策略,还是走的全部表查询。按官方的说法,RangeShardingAlgorith用于处理BETWEEN AND分片,那这种大于小于号的写法可能是不支持。
然后我又改成如下写法,还是不行。
<select id="getSuperParamsValues" resultType="java.util.HashMap">
SELECT
gmt_create AS refreshTime,
column1 AS paramValue
FROM
actab
WHERE
DATE_FORMAT(gmt_create, '%Y%m%d') BETWEEN DATE_FORMAT(#{startTime}, '%Y%m%d') AND DATE_FORMAT(#{endTime}, '%Y%m%d')
ORDER BY gmt_create DESC
LIMIT #{pageOffset},#{pageSize};
</select>
不知道是因为DATE_FORMAT后ShardingJDBC就不认识我配置的这个gmt_create的key了还是别的原因,这个需要对mybatis和shardingjdbc再实验下才能知道了,有大佬知道的话可以指导一下,不胜感激!
这里必须吐槽下ShardingJDBC,官方文档是真的烂!!!!写的什么玩意!!
因为是第一次用这个,完全是自己摸索着来,面向百度的编程,还存在很多的坑得去踩,大家共勉吧。