实时数仓(七)DWM层事实表关联维度表(订单宽表、旁路缓存和异步IO优化)

DWM层事实表关联维度表(订单宽表)

维度关联实际上就是在流中查询存储在 HBase 中的数据表。但是即使通过主键的方式查询, HBase 速度的查询也是不及流之间的 join。外部数据源的查询常常是流式计算的性能瓶颈, 所以进行一定的优化。

(1)旁路缓存(先查redis)

实时数仓(七)DWM层事实表关联维度表(订单宽表、旁路缓存和异步IO优化)_第1张图片

package com.yyds.utils;

import com.google.common.base.CaseFormat;
import net.minidev.json.JSONObject;
import org.apache.commons.beanutils.BeanUtils;

import java.sql.*;
import java.util.ArrayList;
import java.util.List;

public class JdbcUtil {

    public static <T> List<T> queryList(Connection connection,
                                        String querySql,
                                        Class<T> clz,
                                        boolean underScoreToCamel) throws Exception {


        List<T> list = new ArrayList<>();
        PreparedStatement preparedStatement = connection.prepareStatement(querySql);
        ResultSet resultSet = preparedStatement.executeQuery();

        // 获取元数据信息
        ResultSetMetaData metaData = resultSet.getMetaData();
        // 获取列的个数
        int columnCount = metaData.getColumnCount();

        while (resultSet.next()){
            // 创建泛型对象
            T t = clz.newInstance();

            // 给泛型对象赋值
            for (int i = 1; i < columnCount + 1; i++) {
                // 获取列名称
                String columnName = metaData.getColumnName(i);

                if(underScoreToCamel){
                    // 驼峰命名
                    columnName = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL,columnName.toLowerCase());
                }
                // 获取列值
                Object value = resultSet.getObject(i);
                //给T进行赋值
                BeanUtils.setProperty(t,columnName,value);
            }
            // 将该对象添加到集合
            list.add(t);
        }
        resultSet.close();
        preparedStatement.close();
        return list;
    }

  
}

旁路缓存:

package com.yyds.utils;

import com.alibaba.fastjson.JSONObject;
import com.yyds.common.FlinkConfig;
import redis.clients.jedis.Jedis;

import java.sql.*;
import java.util.List;

public class DimUtil {

    public static JSONObject queryDimInfo(  Connection connection,
                                            String tableName,
                                            String id ) throws Exception {
        // 查询redis之前,先查询redis
        // 存jsonStr 用string   不用hash  (1)用户维度数据量大;(2)需要设置过期时间
        Jedis jedis = RedisUtil.getJedis();
        String redisKey = "DIM:" + tableName + ":" +id;
        String dimInfoJsonStr = jedis.get(redisKey);
        if(dimInfoJsonStr != null){
            // 重置过期时间
            jedis.expire(redisKey,24 * 3600);
            jedis.close();
            return JSONObject.parseObject(dimInfoJsonStr);
        }

        String querySql = "select * from " + FlinkConfig.HBASE_SCHEMA  + "." + tableName + " where id ='" + id +"'";

        List<JSONObject> jsonObjects = JdbcUtil.queryList(connection, querySql, JSONObject.class, false);

        // 一秒处理200条数据(单并行度)
        JSONObject dimInfoJsonObject = jsonObjects.get(0);
        // 将数据写入到redis
        jedis.set(redisKey,dimInfoJsonObject.toJSONString());
        // 设置过期时间
        jedis.expire(redisKey,24 * 3600);
        jedis.close();

        //返回数据
        return dimInfoJsonObject;
    }



    public static void delRedisData(String tableName,String id){
        Jedis jedis = RedisUtil.getJedis();
        String redisKey = "DIM:" + tableName + ":" +id;
        jedis.del(redisKey);
        jedis.close();
    }
}

package com.yyds.utils;

import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;

public class RedisUtil {
    public static JedisPool jedisPool = null;
    public static Jedis getJedis() {
        if (jedisPool == null) {
            JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();
            jedisPoolConfig.setMaxTotal(100); //最大可用连接数
            jedisPoolConfig.setBlockWhenExhausted(true); //连接耗尽是否等待
            jedisPoolConfig.setMaxWaitMillis(2000); //等待时间
            jedisPoolConfig.setMaxIdle(5); //最大闲置连接数
            jedisPoolConfig.setMinIdle(5); //最小闲置连接数
            jedisPoolConfig.setTestOnBorrow(true); //取连接的时候进行一下测试 ping pong
                    jedisPool = new JedisPool(jedisPoolConfig, "centos01", 6379, 1000);
            System.out.println("开辟连接池");
            return jedisPool.getResource();
        } else {
            // System.out.println(" 连接池:" + jedisPool.getNumActive());
            return jedisPool.getResource();
        }
    }



}


修改DimSinkFunction

package com.yyds.app.function;

import com.alibaba.fastjson.JSONObject;
import com.yyds.common.FlinkConfig;
import com.yyds.utils.DimUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Collection;
import java.util.Set;


/**
 * 自定义sink实现将Hbase流数据写入phoenix表
 */
public class DimSinkFunction extends RichSinkFunction<JSONObject> {


    private Connection connection;

    // 初始化连接
    @Override
    public void open(Configuration parameters) throws Exception {
        Class.forName(FlinkConfig.PHOENIX_DRIVER);
        connection = DriverManager.getConnection(
                FlinkConfig.PHOENIX_SERVER
        );
    }

    @Override
    public void invoke(JSONObject value, Context context) throws Exception {
        // value = {"sinkTable":"", "database":"","before":{},"after":{},"type":"insert","tableName":""}
        // phoenix sql语句
        // upsert into db.tn (id,name) values('...','...')
        PreparedStatement preparedStatement = null;

        try {
            //1、获取sql
            String sinkTable = value.getString("sinkTable");
            JSONObject after = value.getJSONObject("after");
            String upsertSql = getUpsertSql(
                    sinkTable,
                    after
            );
            System.out.println("phoenix sql ===> " + upsertSql);
            // 2、预编译sql语句
            preparedStatement = connection.prepareStatement(upsertSql);

            // TODO 如果更新,就从redis中删除当前数据
            if("update".equals(value.get("type"))){
                DimUtil.delRedisData(sinkTable.toUpperCase(),after.getString("id"));
            }



            // 3、执行插入操作
            preparedStatement.executeUpdate();

            connection.commit();
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            if(preparedStatement != null){
                try {
                    preparedStatement.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    // upsert into db.tn (id,name) values('...','...')
    private String getUpsertSql(String sinkTable, JSONObject after) {
        String sql = " upsert into " + FlinkConfig.HBASE_SCHEMA + "." + sinkTable + " ( " ;
        Set<String> keySet = after.keySet();
        Collection<Object> values = after.values();
        sql = StringUtils.join(keySet,",") + " ) values ('" +
                StringUtils.join(values,"','") + "')";
        return sql;
    }
}

(2)异步IO

实时数仓(七)DWM层事实表关联维度表(订单宽表、旁路缓存和异步IO优化)_第2张图片
线程池工具类

package com.yyds.utils;

import org.apache.hadoop.util.ThreadUtil;

import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;


/**
 * 线程池工具类
 */
public class ThreadPoolUtil {

    private static  ThreadPoolExecutor threadPoolExecutor = null;

    private ThreadPoolUtil() {

    }

    public static ThreadPoolExecutor getThreadPool(){
        // 单例模式
        if(threadPoolExecutor == null){
            synchronized (ThreadUtil.class){
                if(threadPoolExecutor == null){
                    threadPoolExecutor = new ThreadPoolExecutor(
                            8,
                            20,
                            1L,
                            TimeUnit.MINUTES,
                            new LinkedBlockingDeque<>());
                }
            }
        }
        return threadPoolExecutor;
    }


}

封装维度异步查询的函数类 DimAsyncFunction

package com.yyds.app.function;

import com.alibaba.fastjson.JSONObject;

public interface DimAsyncJoinFunction<T> {
     String getKey(T input);

    void connect(T input, JSONObject dimInfo);
}

package com.yyds.app.function;

import com.alibaba.fastjson.JSONObject;
import com.yyds.common.FlinkConfig;
import com.yyds.utils.DimUtil;
import com.yyds.utils.ThreadPoolUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.util.Collections;
import java.util.concurrent.ThreadPoolExecutor;



/**
 * 异步IO
 */
public abstract class DimAsyncFunction<T>  extends RichAsyncFunction<T, T> implements DimAsyncJoinFunction<T> {


    private Connection connection;

    private ThreadPoolExecutor threadPoolExecutor;

    private String tableName ;

    public DimAsyncFunction(String tableName) {
        this.tableName = tableName;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        Class.forName(FlinkConfig.PHOENIX_DRIVER);
        connection = DriverManager.getConnection(FlinkConfig.PHOENIX_SERVER);
        threadPoolExecutor = ThreadPoolUtil.getThreadPool();
    }



    @Override
    public void asyncInvoke(T input, ResultFuture<T> resultFuture) throws Exception {
        threadPoolExecutor.submit(new Runnable() {
            @Override
            public void run() {


                try {
                    // 获取主键
                    String id = getKey(input);

                    // 查询维度信息
                    JSONObject dimInfo = DimUtil.queryDimInfo(connection, tableName, id);
                    // 补充维度信息
                    if(dimInfo != null){
                        connect(input,dimInfo);
                    }

                    // 将数据输出
                    resultFuture.complete(Collections.singleton(input));
                } catch (Exception e) {
                    e.printStackTrace();
                }

            }
        });

    }



    @Override
    public void timeout(T input, ResultFuture<T> resultFuture) throws Exception {
        System.out.println("TimeOut: " + input);
    }
}

关联维度(在 OrderWideApp 中)

// TODO 4、关联维度信息

 // 异步IO优化
        // (1) 关联用户维度信息
        SingleOutputStreamOperator<OrderWide> orderWideWithUserDS = AsyncDataStream.unorderedWait(
                joinedOrderWideDS,
                new DimAsyncFunction<OrderWide>("DIM_USER_INFO"){
                    @Override
                    public String getKey(OrderWide orderWide) {
                        return orderWide.getUser_id().toString();
                    }

                    @Override
                    public void connect(OrderWide orderWide, JSONObject dimInfo) {
                        String birthday = dimInfo.getString("BIRTHDAY");
                        String gender = dimInfo.getString("GENDER");

                        orderWide.setUser_gender(gender);
                        orderWide.setUser_age(CommonUtil.getAgeByBirthday(birthday));
                    }
                },
                60,
                TimeUnit.SECONDS
        );

        orderWideWithUserDS.print("orderWideWithUserDS---------------------");

        // (2) 关联地区维度
        SingleOutputStreamOperator<OrderWide> orderWideWithProvinceDS= AsyncDataStream.unorderedWait(
                orderWideWithUserDS,
                new DimAsyncFunction<OrderWide>("DIM_BASE_PROVINCE") {

                    @Override
                    public String getKey(OrderWide orderWide) {
                        return orderWide.getProvince_id().toString();
                    }

                    @Override
                    public void connect(OrderWide input, JSONObject dimInfo) {

                        String name = dimInfo.getString("NAME");
                        String area_code = dimInfo.getString("AREA_CODE");
                        String iso_code = dimInfo.getString("ISO_CODE");
                        String iso_3166_2 = dimInfo.getString("ISO_3166_2");

                        input.setProvince_name(name);
                        input.setProvince_area_code(area_code);
                        input.setProvince_iso_code(iso_code);
                        input.setProvince_3166_2_code(iso_3166_2);
                    }
                },
                60,
                TimeUnit.SECONDS
        );

        // 关联其他维度(SKU维度,SPU维度,品牌维度,品类维度)


        // TODO 5、将数据写入kafka
        orderWideWithProvinceDS
                .map(JSONObject::toJSONString)
                .addSink(MyKafkaUtils.getKafkaProducer(orderWideSinkTopic));

你可能感兴趣的:(#,数仓,big,data)