维度关联实际上就是在流中查询存储在 HBase 中的数据表。但是即使通过主键的方式查询, HBase 速度的查询也是不及流之间的 join。外部数据源的查询常常是流式计算的性能瓶颈, 所以进行一定的优化。
package com.yyds.utils;
import com.google.common.base.CaseFormat;
import net.minidev.json.JSONObject;
import org.apache.commons.beanutils.BeanUtils;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
public class JdbcUtil {
public static <T> List<T> queryList(Connection connection,
String querySql,
Class<T> clz,
boolean underScoreToCamel) throws Exception {
List<T> list = new ArrayList<>();
PreparedStatement preparedStatement = connection.prepareStatement(querySql);
ResultSet resultSet = preparedStatement.executeQuery();
// 获取元数据信息
ResultSetMetaData metaData = resultSet.getMetaData();
// 获取列的个数
int columnCount = metaData.getColumnCount();
while (resultSet.next()){
// 创建泛型对象
T t = clz.newInstance();
// 给泛型对象赋值
for (int i = 1; i < columnCount + 1; i++) {
// 获取列名称
String columnName = metaData.getColumnName(i);
if(underScoreToCamel){
// 驼峰命名
columnName = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL,columnName.toLowerCase());
}
// 获取列值
Object value = resultSet.getObject(i);
//给T进行赋值
BeanUtils.setProperty(t,columnName,value);
}
// 将该对象添加到集合
list.add(t);
}
resultSet.close();
preparedStatement.close();
return list;
}
}
旁路缓存:
package com.yyds.utils;
import com.alibaba.fastjson.JSONObject;
import com.yyds.common.FlinkConfig;
import redis.clients.jedis.Jedis;
import java.sql.*;
import java.util.List;
public class DimUtil {
public static JSONObject queryDimInfo( Connection connection,
String tableName,
String id ) throws Exception {
// 查询redis之前,先查询redis
// 存jsonStr 用string 不用hash (1)用户维度数据量大;(2)需要设置过期时间
Jedis jedis = RedisUtil.getJedis();
String redisKey = "DIM:" + tableName + ":" +id;
String dimInfoJsonStr = jedis.get(redisKey);
if(dimInfoJsonStr != null){
// 重置过期时间
jedis.expire(redisKey,24 * 3600);
jedis.close();
return JSONObject.parseObject(dimInfoJsonStr);
}
String querySql = "select * from " + FlinkConfig.HBASE_SCHEMA + "." + tableName + " where id ='" + id +"'";
List<JSONObject> jsonObjects = JdbcUtil.queryList(connection, querySql, JSONObject.class, false);
// 一秒处理200条数据(单并行度)
JSONObject dimInfoJsonObject = jsonObjects.get(0);
// 将数据写入到redis
jedis.set(redisKey,dimInfoJsonObject.toJSONString());
// 设置过期时间
jedis.expire(redisKey,24 * 3600);
jedis.close();
//返回数据
return dimInfoJsonObject;
}
public static void delRedisData(String tableName,String id){
Jedis jedis = RedisUtil.getJedis();
String redisKey = "DIM:" + tableName + ":" +id;
jedis.del(redisKey);
jedis.close();
}
}
package com.yyds.utils;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
public class RedisUtil {
public static JedisPool jedisPool = null;
public static Jedis getJedis() {
if (jedisPool == null) {
JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();
jedisPoolConfig.setMaxTotal(100); //最大可用连接数
jedisPoolConfig.setBlockWhenExhausted(true); //连接耗尽是否等待
jedisPoolConfig.setMaxWaitMillis(2000); //等待时间
jedisPoolConfig.setMaxIdle(5); //最大闲置连接数
jedisPoolConfig.setMinIdle(5); //最小闲置连接数
jedisPoolConfig.setTestOnBorrow(true); //取连接的时候进行一下测试 ping pong
jedisPool = new JedisPool(jedisPoolConfig, "centos01", 6379, 1000);
System.out.println("开辟连接池");
return jedisPool.getResource();
} else {
// System.out.println(" 连接池:" + jedisPool.getNumActive());
return jedisPool.getResource();
}
}
}
修改DimSinkFunction
package com.yyds.app.function;
import com.alibaba.fastjson.JSONObject;
import com.yyds.common.FlinkConfig;
import com.yyds.utils.DimUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Collection;
import java.util.Set;
/**
* 自定义sink实现将Hbase流数据写入phoenix表
*/
public class DimSinkFunction extends RichSinkFunction<JSONObject> {
private Connection connection;
// 初始化连接
@Override
public void open(Configuration parameters) throws Exception {
Class.forName(FlinkConfig.PHOENIX_DRIVER);
connection = DriverManager.getConnection(
FlinkConfig.PHOENIX_SERVER
);
}
@Override
public void invoke(JSONObject value, Context context) throws Exception {
// value = {"sinkTable":"", "database":"","before":{},"after":{},"type":"insert","tableName":""}
// phoenix sql语句
// upsert into db.tn (id,name) values('...','...')
PreparedStatement preparedStatement = null;
try {
//1、获取sql
String sinkTable = value.getString("sinkTable");
JSONObject after = value.getJSONObject("after");
String upsertSql = getUpsertSql(
sinkTable,
after
);
System.out.println("phoenix sql ===> " + upsertSql);
// 2、预编译sql语句
preparedStatement = connection.prepareStatement(upsertSql);
// TODO 如果更新,就从redis中删除当前数据
if("update".equals(value.get("type"))){
DimUtil.delRedisData(sinkTable.toUpperCase(),after.getString("id"));
}
// 3、执行插入操作
preparedStatement.executeUpdate();
connection.commit();
} catch (SQLException e) {
e.printStackTrace();
} finally {
if(preparedStatement != null){
try {
preparedStatement.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
// upsert into db.tn (id,name) values('...','...')
private String getUpsertSql(String sinkTable, JSONObject after) {
String sql = " upsert into " + FlinkConfig.HBASE_SCHEMA + "." + sinkTable + " ( " ;
Set<String> keySet = after.keySet();
Collection<Object> values = after.values();
sql = StringUtils.join(keySet,",") + " ) values ('" +
StringUtils.join(values,"','") + "')";
return sql;
}
}
package com.yyds.utils;
import org.apache.hadoop.util.ThreadUtil;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* 线程池工具类
*/
public class ThreadPoolUtil {
private static ThreadPoolExecutor threadPoolExecutor = null;
private ThreadPoolUtil() {
}
public static ThreadPoolExecutor getThreadPool(){
// 单例模式
if(threadPoolExecutor == null){
synchronized (ThreadUtil.class){
if(threadPoolExecutor == null){
threadPoolExecutor = new ThreadPoolExecutor(
8,
20,
1L,
TimeUnit.MINUTES,
new LinkedBlockingDeque<>());
}
}
}
return threadPoolExecutor;
}
}
封装维度异步查询的函数类 DimAsyncFunction
package com.yyds.app.function;
import com.alibaba.fastjson.JSONObject;
public interface DimAsyncJoinFunction<T> {
String getKey(T input);
void connect(T input, JSONObject dimInfo);
}
package com.yyds.app.function;
import com.alibaba.fastjson.JSONObject;
import com.yyds.common.FlinkConfig;
import com.yyds.utils.DimUtil;
import com.yyds.utils.ThreadPoolUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.util.Collections;
import java.util.concurrent.ThreadPoolExecutor;
/**
* 异步IO
*/
public abstract class DimAsyncFunction<T> extends RichAsyncFunction<T, T> implements DimAsyncJoinFunction<T> {
private Connection connection;
private ThreadPoolExecutor threadPoolExecutor;
private String tableName ;
public DimAsyncFunction(String tableName) {
this.tableName = tableName;
}
@Override
public void open(Configuration parameters) throws Exception {
Class.forName(FlinkConfig.PHOENIX_DRIVER);
connection = DriverManager.getConnection(FlinkConfig.PHOENIX_SERVER);
threadPoolExecutor = ThreadPoolUtil.getThreadPool();
}
@Override
public void asyncInvoke(T input, ResultFuture<T> resultFuture) throws Exception {
threadPoolExecutor.submit(new Runnable() {
@Override
public void run() {
try {
// 获取主键
String id = getKey(input);
// 查询维度信息
JSONObject dimInfo = DimUtil.queryDimInfo(connection, tableName, id);
// 补充维度信息
if(dimInfo != null){
connect(input,dimInfo);
}
// 将数据输出
resultFuture.complete(Collections.singleton(input));
} catch (Exception e) {
e.printStackTrace();
}
}
});
}
@Override
public void timeout(T input, ResultFuture<T> resultFuture) throws Exception {
System.out.println("TimeOut: " + input);
}
}
关联维度(在 OrderWideApp 中)
// TODO 4、关联维度信息
// 异步IO优化
// (1) 关联用户维度信息
SingleOutputStreamOperator<OrderWide> orderWideWithUserDS = AsyncDataStream.unorderedWait(
joinedOrderWideDS,
new DimAsyncFunction<OrderWide>("DIM_USER_INFO"){
@Override
public String getKey(OrderWide orderWide) {
return orderWide.getUser_id().toString();
}
@Override
public void connect(OrderWide orderWide, JSONObject dimInfo) {
String birthday = dimInfo.getString("BIRTHDAY");
String gender = dimInfo.getString("GENDER");
orderWide.setUser_gender(gender);
orderWide.setUser_age(CommonUtil.getAgeByBirthday(birthday));
}
},
60,
TimeUnit.SECONDS
);
orderWideWithUserDS.print("orderWideWithUserDS---------------------");
// (2) 关联地区维度
SingleOutputStreamOperator<OrderWide> orderWideWithProvinceDS= AsyncDataStream.unorderedWait(
orderWideWithUserDS,
new DimAsyncFunction<OrderWide>("DIM_BASE_PROVINCE") {
@Override
public String getKey(OrderWide orderWide) {
return orderWide.getProvince_id().toString();
}
@Override
public void connect(OrderWide input, JSONObject dimInfo) {
String name = dimInfo.getString("NAME");
String area_code = dimInfo.getString("AREA_CODE");
String iso_code = dimInfo.getString("ISO_CODE");
String iso_3166_2 = dimInfo.getString("ISO_3166_2");
input.setProvince_name(name);
input.setProvince_area_code(area_code);
input.setProvince_iso_code(iso_code);
input.setProvince_3166_2_code(iso_3166_2);
}
},
60,
TimeUnit.SECONDS
);
// 关联其他维度(SKU维度,SPU维度,品牌维度,品类维度)
// TODO 5、将数据写入kafka
orderWideWithProvinceDS
.map(JSONObject::toJSONString)
.addSink(MyKafkaUtils.getKafkaProducer(orderWideSinkTopic));