2021-01-29-Flink-34(Flink 实时业务需求案例 )

1.业务数据的采集

canal

2.Canal的使用

1.添加⼀个普通⽤户

CREATE USER canal IDENTIFIED BY 'canal123'; 创建一个用户
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON . TO 'canal'@'%'; 添加权限
FLUSH PRIVILEGES; 刷新

如果失败:
修改密码的安全权限
set global validate_password_policy=LOW;
set global validate_password_length=6;

2. 修改MySQL的配置

canal的原理是基于mysql binlog技术,所以这⾥⼀定需要开启mysql的binlog写⼊功能,建议配置binlog模式为row
vi /etc/my.cnf

[mysqld]
log-bin=mysql-bin #添加这⼀⾏就ok
binlog-format=ROW #选择row模式
server_id=1 #配置mysql replaction需要定义,不能和canal的slaveId重复

service mysqld restart 重启

3..修改Canal的配置⽂件

cd /bigdata/canal/
vi canal.properties
#canal跟kafka整合,将数据发送到kafka
canal.serverMode = kafka
#指定kafka broker地址
canal.mq.servers = linux03:9092,linux04:9092,linux05:9092
#数据发送kafka失败重试次数
canal.mq.retries = 10

修改canal的实例配置⽂件

vi conf/example/instance.properties vi conf/example/instance.properties

#mysql数据库的地址
canal.instance.master.address=127.0.0.1:3306
#mysql⽤户名
canal.instance.dbUsername=canal
#mysql密码
canal.instance.dbPassword=canal123
#注释掉使⽤默认的topic(将数据写⼊到默认的Topic)
#canal.mq.topic=example
# dynamic topic route by schema or table regex

#使⽤动态topic,将doit的数据库的test表发送到kafka的test1的topic
#将hhht的数据库的user表发送到kafka的user1的topic
canal.mq.dynamicTopic=topic1:db1\\.tb1,user1:hhh\\.user

4.启动Canal

bin/startup.sh

5.问题的处理

启动不起来
1.删除mysql⽬录下的数据
rm -rf /var/lib/mysql
mysqld --initialize --user=mysql

3.join案例

2021-01-26 09:00:01,user1,tuan001,手机,product005,9.9,create
2021-01-26 09:00:08,user2,tuan001,手机,product005,9.9,join
2021-01-26 09:00:08,user2,tuan001,手机,product005,9.9,join
2021-01-26 09:00:08,tuan001,3,手机,product005,9.9,success
2021-01-26 09:00:10,user5,tuan002,电脑,product009,9.9,create
2021-01-26 09:00:08,user3,tuan002,电脑,product009,9.9,join
2021-01-26 09:00:08,user4,tuan002,电脑,product009,9.9,join
2021-01-26 09:00:08,tuan002,3,电脑,product009,9.9,success
2021-01-26 09:00:01,user1,tuan003,服装,product007,9.9,create
2021-01-26 09:00:08,user2,tuan003,电脑,product009,9.9,join
2021-01-26 09:00:01,user2,tuan004,日用品,product010,9.9,create
2021-01-26 10:00:01,tuan004,日用品,product010,9.9,fail




1.从今天凌晨开始,到现在有多少个团,各种团的状态
2.参与开团的人数和次数
3.通过各个分类成交的开团金额
public class FlinkUtilsV2 {

    private static StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


    public static  DataStream createKafkaDataStream(ParameterTool parameters, Class> clazz) throws Exception {
        String topics = parameters.getRequired("kafka.topics");
        String groupId = parameters.getRequired("group.id");
        return createKafkaDataStream(parameters, topics, groupId, clazz);
    }



    public static  DataStream createKafkaDataStream(ParameterTool parameters, String topics, Class> clazz) throws Exception {

        String groupId = parameters.getRequired("group.id");
        return createKafkaDataStream(parameters, topics, groupId, clazz);
    }


    public static  DataStream createKafkaDataStream(ParameterTool parameters, String topics, String groupId, Class> clazz) throws Exception {

        //将ParameterTool的参数设置成全局的参数
        env.getConfig().setGlobalJobParameters(parameters);

        //开启checkpoint
        env.enableCheckpointing(parameters.getLong("checkpoint.interval", 10000L), CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointingMode(CheckpointConfig.DEFAULT_MODE);

        //重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(parameters.getInt("restart.times", 10), Time.seconds(5)));

        //设置statebackend
        String path = parameters.get("state.backend.path");
        if(path != null) {
            //最好的方式将setStateBackend配置到Flink的全局配置文件中flink-conf.yaml
            env.setStateBackend(new FsStateBackend(path));
        }

        //设置cancel任务不用删除checkpoint
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        env.getCheckpointConfig().setMaxConcurrentCheckpoints(3);

        //String topics = parameters.getRequired("kafka.topics");

        List topicList = Arrays.asList(topics.split(","));

        Properties properties = parameters.getProperties();

        properties.setProperty("group.id", groupId);

        //创建FlinkKafkaConsumer
        FlinkKafkaConsumer kafkaConsumer = new FlinkKafkaConsumer(
                topicList,
                clazz.newInstance(),
                properties
        );

        return env.addSource(kafkaConsumer);
    }

    public static StreamExecutionEnvironment getEnv() {
        return env;
    }

public class OrderMain {

    private Long oid;
    private Date create_time;
    private Double total_money;
    private int status;
    private Date update_time;
    private String province;
    private String city;
    //对数据库的操作类型:INSERT、UPDATE
    private String type;

    public Long getOid() {
        return oid;
    }

    public void setOid(Long oid) {
        this.oid = oid;
    }

    public Date getCreate_time() {
        return create_time;
    }

    public void setCreate_time(Date create_time) {
        this.create_time = create_time;
    }

    public Double getTotal_money() {
        return total_money;
    }

    public void setTotal_money(Double total_money) {
        this.total_money = total_money;
    }

    public int getStatus() {
        return status;
    }

    public void setStatus(int status) {
        this.status = status;
    }

    public Date getUpdate_time() {
        return update_time;
    }

    public void setUpdate_time(Date update_time) {
        this.update_time = update_time;
    }

    public String getProvince() {
        return province;
    }

    public void setProvince(String province) {
        this.province = province;
    }

    public String getCity() {
        return city;
    }

    public void setCity(String city) {
        this.city = city;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    @Override
    public String toString() {
        return "OrderMain{" +
                "oid=" + oid +
                ", create_time=" + create_time +
                ", total_money=" + total_money +
                ", status=" + status +
                ", update_time=" + update_time +
                ", province='" + province + '\'' +
                ", city='" + city + '\'' +
                ", type='" + type + '\'' +
                '}';
    }
}

public class OrderDetail {

    private Long id;
    private Long order_id;
    private int category_id;
    private String categoryName;
    private Long sku;
    private Double money;
    private int amount;
    private Date create_time;
    private Date update_time;

    //对数据库的操作类型:INSERT、UPDATE
    private String type;

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public Long getOrder_id() {
        return order_id;
    }

    public void setOrder_id(Long order_id) {
        this.order_id = order_id;
    }

    public int getCategory_id() {
        return category_id;
    }

    public void setCategory_id(int category_id) {
        this.category_id = category_id;
    }

    public Long getSku() {
        return sku;
    }

    public void setSku(Long sku) {
        this.sku = sku;
    }

    public Double getMoney() {
        return money;
    }

    public void setMoney(Double money) {
        this.money = money;
    }

    public int getAmount() {
        return amount;
    }

    public void setAmount(int amount) {
        this.amount = amount;
    }

    public Date getCreate_time() {
        return create_time;
    }

    public void setCreate_time(Date create_time) {
        this.create_time = create_time;
    }

    public Date getUpdate_time() {
        return update_time;
    }

    public void setUpdate_time(Date update_time) {
        this.update_time = update_time;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    public String getCategoryName() {
        return categoryName;
    }

    public void setCategoryName(String categoryName) {
        this.categoryName = categoryName;
    }

    @Override
    public String toString() {
        return "OrderDetail{" +
                "id=" + id +
                ", order_id=" + order_id +
                ", category_id=" + category_id +
                ", categoryName='" + categoryName + '\'' +
                ", sku=" + sku +
                ", money=" + money +
                ", amount=" + amount +
                ", create_time=" + create_time +
                ", update_time=" + update_time +
                ", type='" + type + '\'' +
                '}';
    }
}

public class OrderJoin {


    public static void main(String[] args) throws Exception {

        ParameterTool parameters = ParameterTool.fromPropertiesFile(args[0]);

        //使用EventTime作为时间标准
        FlinkUtilsV2.getEnv().setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        DataStream orderMainLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "ordermain", "g1", SimpleStringSchema.class);

        DataStream orderDetailLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "orderdetail", "g1", SimpleStringSchema.class);

        //对数据进行解析
        SingleOutputStreamOperator orderMainDataStream = orderMainLinesDataStream.process(new ProcessFunction() {

            @Override
            public void processElement(String line, Context ctx, Collector out) throws Exception {
                //flatMap+filter
                try {
                    JSONObject jsonObject = JSON.parseObject(line);
                    String type = jsonObject.getString("type");
                    if (type.equals("INSERT") || type.equals("UPDATE")) {
                        JSONArray jsonArray = jsonObject.getJSONArray("data");
                        for (int i = 0; i < jsonArray.size(); i++) {
                            OrderMain orderMain = jsonArray.getObject(i, OrderMain.class);
                            orderMain.setType(type); //设置操作类型
                            out.collect(orderMain);
                        }
                    }
                } catch (Exception e) {
                    //e.printStackTrace();
                    //记录错误的数据
                }
            }
        });

        //对数据进行解析
        SingleOutputStreamOperator orderDetailDataStream = orderDetailLinesDataStream.process(new ProcessFunction() {

            @Override
            public void processElement(String line, Context ctx, Collector out) throws Exception {
                //flatMap+filter
                try {
                    JSONObject jsonObject = JSON.parseObject(line);
                    String type = jsonObject.getString("type");
                    if (type.equals("INSERT") || type.equals("UPDATE")) {
                        JSONArray jsonArray = jsonObject.getJSONArray("data");
                        for (int i = 0; i < jsonArray.size(); i++) {
                            OrderDetail orderDetail = jsonArray.getObject(i, OrderDetail.class);
                            orderDetail.setType(type); //设置操作类型
                            out.collect(orderDetail);
                        }
                    }
                } catch (Exception e) {
                    //e.printStackTrace();
                    //记录错误的数据
                }
            }
        });

        int delaySeconds = 2;

        //提取EventTime生成WaterMark
        SingleOutputStreamOperator orderMainStreamWithWaterMark = orderMainDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
            @Override
            public long extractTimestamp(OrderMain element) {
                return element.getCreate_time().getTime();
            }
        });

        SingleOutputStreamOperator orderDetailStreamWithWaterMark = orderDetailDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
            @Override
            public long extractTimestamp(OrderDetail element) {
                return element.getCreate_time().getTime();
            }
        });

        //Left Out JOIN,并且将订单明细表作为左表
        DataStream> joined = orderDetailStreamWithWaterMark.coGroup(orderMainStreamWithWaterMark)
                .where(new KeySelector() {
                    @Override
                    public Long getKey(OrderDetail value) throws Exception {
                        return value.getOrder_id();
                    }
                })
                .equalTo(new KeySelector() {
                    @Override
                    public Long getKey(OrderMain value) throws Exception {
                        return value.getOid();
                    }
                })
                .window(TumblingEventTimeWindows.of(Time.seconds(5)))
                .apply(new CoGroupFunction>() {

                    @Override
                    public void coGroup(Iterable first, Iterable second, Collector> out) throws Exception {

                        for (OrderDetail orderDetail : first) {
                            boolean isJoined = false;
                            for (OrderMain orderMain : second) {
                                out.collect(Tuple2.of(orderDetail, orderMain));
                                isJoined = true;
                            }
                            if (!isJoined) {
                                out.collect(Tuple2.of(orderDetail, null));
                            }
                        }
                    }
                });

        joined.print();

        FlinkUtilsV2.getEnv().execute();
    }
}


public class OrderJoinAdv {


    public static void main(String[] args) throws Exception {

        ParameterTool parameters = ParameterTool.fromPropertiesFile(args[0]);

        FlinkUtilsV2.getEnv().setParallelism(1);

        //使用EventTime作为时间标准
        FlinkUtilsV2.getEnv().setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        DataStream orderMainLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "ordermain", "g1", SimpleStringSchema.class);

        DataStream orderDetailLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "orderdetail", "g1", SimpleStringSchema.class);

        //对数据进行解析
        SingleOutputStreamOperator orderMainDataStream = orderMainLinesDataStream.process(new ProcessFunction() {

            @Override
            public void processElement(String line, Context ctx, Collector out) throws Exception {
                //flatMap+filter
                try {
                    JSONObject jsonObject = JSON.parseObject(line);
                    String type = jsonObject.getString("type");
                    if (type.equals("INSERT") || type.equals("UPDATE")) {
                        JSONArray jsonArray = jsonObject.getJSONArray("data");
                        for (int i = 0; i < jsonArray.size(); i++) {
                            OrderMain orderMain = jsonArray.getObject(i, OrderMain.class);
                            orderMain.setType(type); //设置操作类型
                            out.collect(orderMain);
                        }
                    }
                } catch (Exception e) {
                    //e.printStackTrace();
                    //记录错误的数据
                }
            }
        });

        //对数据进行解析
        SingleOutputStreamOperator orderDetailDataStream = orderDetailLinesDataStream.process(new ProcessFunction() {

            @Override
            public void processElement(String line, Context ctx, Collector out) throws Exception {
                //flatMap+filter
                try {
                    JSONObject jsonObject = JSON.parseObject(line);
                    String type = jsonObject.getString("type");
                    if (type.equals("INSERT") || type.equals("UPDATE")) {
                        JSONArray jsonArray = jsonObject.getJSONArray("data");
                        for (int i = 0; i < jsonArray.size(); i++) {
                            OrderDetail orderDetail = jsonArray.getObject(i, OrderDetail.class);
                            orderDetail.setType(type); //设置操作类型
                            out.collect(orderDetail);
                        }
                    }
                } catch (Exception e) {
                    //e.printStackTrace();
                    //记录错误的数据
                }
            }
        });

        int delaySeconds = 2;
        int windowSize = 5;

        //提取EventTime生成WaterMark
        SingleOutputStreamOperator orderMainStreamWithWaterMark = orderMainDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
            @Override
            public long extractTimestamp(OrderMain element) {
                return element.getCreate_time().getTime();
            }
        });

        SingleOutputStreamOperator orderDetailStreamWithWaterMark = orderDetailDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
            @Override
            public long extractTimestamp(OrderDetail element) {
                return element.getCreate_time().getTime();
            }
        });

        //定义迟到侧流输出的Tag
        OutputTag lateTag = new OutputTag("late-date") {};

        //对左表进行单独划分窗口,窗口的长度与cogroup的窗口长度一样
        SingleOutputStreamOperator orderDetailWithWindow = orderDetailStreamWithWaterMark.windowAll(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
                .sideOutputLateData(lateTag) //将迟到的数据打上Tag
                .apply(new AllWindowFunction() {
                    @Override
                    public void apply(TimeWindow window, Iterable values, Collector out) throws Exception {
                        for (OrderDetail value : values) {
                            out.collect(value);
                        }
                    }
                });

        //获取迟到的数据
        DataStream lateOrderDetailStream = orderDetailWithWindow.getSideOutput(lateTag);

        //应为orderDetail表的数据迟到数据不是很多,没必要使用异步IO,直接使用RichMapFunction
        SingleOutputStreamOperator> lateOrderDetailAndOrderMain = lateOrderDetailStream.map(new RichMapFunction>() {
            @Override
            public Tuple2 map(OrderDetail detail) throws Exception {
                return Tuple2.of(detail, null);
            }
        });


        //Left Out JOIN,并且将订单明细表作为左表
        DataStream> joined = orderDetailWithWindow.coGroup(orderMainStreamWithWaterMark)
                .where(new KeySelector() {
                    @Override
                    public Long getKey(OrderDetail value) throws Exception {
                        return value.getOrder_id();
                    }
                })
                .equalTo(new KeySelector() {
                    @Override
                    public Long getKey(OrderMain value) throws Exception {
                        return value.getOid();
                    }
                })
                .window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
                .apply(new CoGroupFunction>() {
                    @Override
                    public void coGroup(Iterable first, Iterable second, Collector> out) throws Exception {
                        for (OrderDetail orderDetail : first) {
                            boolean isJoined = false;
                            for (OrderMain orderMain : second) {
                                out.collect(Tuple2.of(orderDetail, orderMain));
                                isJoined = true;
                            }
                            if (!isJoined) {
                                out.collect(Tuple2.of(orderDetail, null));
                            }
                        }
                    }
                });


        joined.union(lateOrderDetailAndOrderMain).map(new RichMapFunction, Tuple2>() {

            private transient Connection connection;

            @Override
            public void open(Configuration parameters) throws Exception {
                //可以创建数据库连接
                connection = DriverManager.getConnection("jdbc:mysql://172.16.100.100:3306/bigdata?characterEncoding=UTF-8", "root", "123456");
            }

            @Override
            public Tuple2 map(Tuple2 tp) throws Exception {
                //每个关联上订单主表的数据,就查询书库
                if (tp.f1 == null) {
                    tp.f1 = queryOrderMainFromMySQL(tp.f0.getOrder_id(), connection);
                }
                return tp;
            }

            @Override
            public void close() throws Exception {
                //关闭数据库连接
            }
        }).print();


        FlinkUtilsV2.getEnv().execute();
    }

    private static OrderMain queryOrderMainFromMySQL(Long order_id, Connection connection) throws Exception {

        PreparedStatement preparedStatement = connection.prepareStatement("SELECT * ordermain WHERE oid = ?");
        //设置参数
        preparedStatement.setLong(1, order_id);
        //执行查询
        ResultSet resultSet = preparedStatement.executeQuery();
        //取出结果
        long oid = resultSet.getLong("oid");
        Date createTime = resultSet.getDate("create_time");
        double totalMoney = resultSet.getDouble("total_money");
        int status = resultSet.getInt("status");

        OrderMain orderMain = new OrderMain();
        orderMain.setOid(oid);
        orderMain.setStatus(status);

        return orderMain;
    }
}

你可能感兴趣的:(2021-01-29-Flink-34(Flink 实时业务需求案例 ))