在各种各样的系统中,都有订单数据表
订单表:订单主表、订单明细表
订单主表:
订单id、订单状态、订单总金额、订单的时间、用户ID
订单明细表:
订单主表的ID、商品ID、商品的分类ID、商品的单价、商品的数量
统计某个商品分类的成交金额
{"data":[{"oid":"29001","create_time":"2020-03-16 00:38:01","total_money":"2000.0","status":"1",
"update_time":"2020-03-16 00:38:01","uid":"9999","province":"北京市"}],"database":"doit12",
"es":1584333481000,"id":4,"isDdl":false,"mysqlType":{"oid":"bigint(20)","create_time":"timestamp",
"total_money":"double","status":"int(11)","update_time":"timestamp","uid":"varchar(20)",
"province":"varchar(50)"},"old":null,"pkNames":["oid"],"sql":"","sqlType":{"oid":-5,"create_time":93,
"total_money":8,"status":4,"update_time":93,"uid":12,"province":12},"table":"ordermain",
"ts":1584333481540,"type":"INSERT"}
{"data":[{"id":"10","order_id":"29001","category_id":"2","sku":"20001","money":"1000.0","amount":"1",
"create_time":"2020-03-16 00:38:01","update_time":"2020-03-16 00:38:01"}],"database":"doit12",
"es":1584333481000,"id":4,"isDdl":false,"mysqlType":{"id":"bigint(20)","order_id":"bigint(20)",
"category_id":"int(11)","sku":"varchar(50)","money":"double","amount":"int(11)",
"create_time":"timestamp","update_time":"timestamp"},"old":null,"pkNames":["id"],"sql":"","sqlType":{"id
":-5,"order_id":-5,"category_id":4,"sku":12,"money":8,"amount":4,"create_time":93,"update_time":93}
,"table":"orderdetail","ts":1584333481600,"type":"INSERT"}
没有join上的分两种情况:
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class FlinkUtilsV2 {
private static StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 方法重载一:传入配置文件和数据类型
public static <T> DataStream<T> createKafkaDataStream(ParameterTool parameters,
Class<? extends DeserializationSchema<T>> clazz) throws Exception {
String topics = parameters.getRequired("kafka.topics");
String groupId = parameters.getRequired("group.id");
return createKafkaDataStream(parameters, topics, groupId, clazz);
}
// 方法重载二:传入配置文件和数据类型
public static <T> DataStream<T> createKafkaDataStream(ParameterTool parameters, String topics,
Class<? extends DeserializationSchema<T>> clazz) throws Exception {
String groupId = parameters.getRequired("group.id");
return createKafkaDataStream(parameters, topics, groupId, clazz);
}
// 方法重载一:传入配置文件和数据类型
public static <T> DataStream<T> createKafkaDataStream(ParameterTool parameters, String topics, String groupId,
Class<? extends DeserializationSchema<T>> clazz) throws Exception {
// 将配置文件设定为全局配置文件
env.getConfig().setGlobalJobParameters(parameters);
// KafkaSink 需要配置这个
//parameters.getProperties().setProperty("transaction.timeout.ms", 1000 * 60 * 5 + "");
//开启checkpoint
env.enableCheckpointing(parameters.getLong("checkpoint.interval", 10000L),
CheckpointingMode.EXACTLY_ONCE);
//设定重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
parameters.getInt("restart.times", 10), Time.seconds(3)));
//设置statebackend
String path = parameters.get("state.backend.path");
if (path != null) {
//最好的方式将setStateBackend配置到Flink的全局配置文件中flink-conf.yaml
env.setStateBackend(new FsStateBackend(path));
}
//设置cancel任务不自动删除checkpoint
env.getCheckpointConfig().enableExternalizedCheckpoints(
CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//设定最大的并行度
env.getCheckpointConfig().setMaxConcurrentCheckpoints(4);
//当出现多个topic时,放在list集合中
List<String> topicList = Arrays.asList(topics.split(","));
Properties properties = parameters.getProperties();
properties.setProperty("group.id", groupId);
//创建FlinkKafkaConsumer
FlinkKafkaConsumer<T> kafkaConsumer = new FlinkKafkaConsumer<T>(
topicList,
clazz.newInstance(),
properties
);
//在Checkpoint的时候将Kafka的偏移量保存到Kafka特殊的Topic中,默认是true
kafkaConsumer.setCommitOffsetsOnCheckpoints(false);
// 返回kafkaDataStream (lines)
return env.addSource(kafkaConsumer);
}
public static StreamExecutionEnvironment getEnv() {
return env;
}
}
import java.util.Date;
public class OrderMainBean {
private Long oid;
private Date create_time;
private Double total_money;
private int status;
private Date update_time;
private String province;
private String uid;
//对数据库的操作类型:INSERT、UPDATE
private String type;
public OrderMainBean() {
}
public OrderMainBean(Long oid, Date create_time, Double total_money, int status, Date update_time,
String province, String uid, String type) {
this.oid = oid;
this.create_time = create_time;
this.total_money = total_money;
this.status = status;
this.update_time = update_time;
this.province = province;
this.uid = uid;
this.type = type;
}
@Override
public String toString() {
return "OrderMainBean{" +
"oid=" + oid +
", create_time=" + create_time +
", total_money=" + total_money +
", status=" + status +
", update_time=" + update_time +
", province='" + province + '\'' +
", uid='" + uid + '\'' +
", type='" + type + '\'' +
'}';
}
public Long getOid() {
return oid;
}
public void setOid(Long oid) {
this.oid = oid;
}
public Date getCreate_time() {
return create_time;
}
public void setCreate_time(Date create_time) {
this.create_time = create_time;
}
public Double getTotal_money() {
return total_money;
}
public void setTotal_money(Double total_money) {
this.total_money = total_money;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public Date getUpdate_time() {
return update_time;
}
public void setUpdate_time(Date update_time) {
this.update_time = update_time;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}
import java.util.Date;
public class OrderDetailBean {
private Long id;
private Long order_id;
private int category_id;
private Long sku;
private Double money;
private int amount;
private Date create_time;
private Date update_time;
//对数据库的操作类型:INSERT、UPDATE
private String type;
public OrderDetailBean() {
}
public OrderDetailBean(Long id, Long order_id, int category_id, Long sku, Double money, int amount,
Date create_time, Date update_time, String type) {
this.id = id;
this.order_id = order_id;
this.category_id = category_id;
this.sku = sku;
this.money = money;
this.amount = amount;
this.create_time = create_time;
this.update_time = update_time;
this.type = type;
}
@Override
public String toString() {
return "OrderDetailBean{" +
"id=" + id +
", order_id=" + order_id +
", category_id=" + category_id +
", sku=" + sku +
", money=" + money +
", amount=" + amount +
", create_time=" + create_time +
", update_time=" + update_time +
", type='" + type + '\'' +
'}';
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public Long getOrder_id() {
return order_id;
}
public void setOrder_id(Long order_id) {
this.order_id = order_id;
}
public int getCategory_id() {
return category_id;
}
public void setCategory_id(int category_id) {
this.category_id = category_id;
}
public Long getSku() {
return sku;
}
public void setSku(Long sku) {
this.sku = sku;
}
public Double getMoney() {
return money;
}
public void setMoney(Double money) {
this.money = money;
}
public int getAmount() {
return amount;
}
public void setAmount(int amount) {
this.amount = amount;
}
public Date getCreate_time() {
return create_time;
}
public void setCreate_time(Date create_time) {
this.create_time = create_time;
}
public Date getUpdate_time() {
return update_time;
}
public void setUpdate_time(Date update_time) {
this.update_time = update_time;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}
checkpoint.interval=30000
restart.times=3
state.backend.path=file:///D:\\doit12logs
bootstrap.servers=linux01:9092,linux02:9092,linux03:9092
group.id=g22
auto.offset.reset=earliest
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.sql.*;
/**
* @date: 2020/3/16 21:22
* @site: www.ianlou.cn
* @author: lekko 六水
* @qq: 496208110
* @description:
*/
public class OrderJoinAdv {
public static void main(String[] args) throws Exception {
//获取配置文件
ParameterTool propertiesFile = ParameterTool.fromPropertiesFile(args[0]);
StreamExecutionEnvironment env = FlinkUtilsV2.getEnv();
// 使用EventTime作为时间标准
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// 一、从kafka中加载数据源
DataStream<String> orderMainLines = FlinkUtilsV2.createKafkaDataStream(propertiesFile, "ordermain",
"g011", SimpleStringSchema.class);
DataStream<String> orderDetailLines = FlinkUtilsV2.createKafkaDataStream(propertiesFile, "orderdetail",
"g011", SimpleStringSchema.class);
// 二、对从kafka拉取到的json数据,进行解析
// 加载OrderMain中数据
SingleOutputStreamOperator<OrderMainBean> orderMainBeanDS = orderMainLines.process(new ProcessFunction<String,
OrderMainBean>() {
@Override
public void processElement(String input, Context ctx, Collector<OrderMainBean> out) throws Exception {
try {
JSONObject jsonObject = JSON.parseObject(input);
String type = jsonObject.getString("type");
if (type.equals("INSERT") || type.equals("UPDATE")) {
//将data[]数组的{}中内容取出来
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
OrderMainBean orderMain = jsonArray.getObject(i, OrderMainBean.class);
orderMain.setType(type); //设置操作类型
out.collect(orderMain);
}
}
} catch (Exception e) {
//e.printStackTrace();
//TODO 记录错误数据
}
}
});
//加载OrderDetail中数据
SingleOutputStreamOperator<OrderDetailBean> orderDetailBeanDS =
orderDetailLines.process(new ProcessFunction<String,
OrderDetailBean>() {
@Override
public void processElement(String input, Context ctx, Collector<OrderDetailBean> out) throws Exception {
try {
JSONObject jsonObject = JSON.parseObject(input);
String type = jsonObject.getString("type");
if (type.equals("INSERT") || type.equals("UPDATE")) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
OrderDetailBean orderDetail = jsonArray.getObject(i, OrderDetailBean.class);
orderDetail.setType(type); //设置操作类型
out.collect(orderDetail);
}
}
} catch (Exception e) {
//e.printStackTrace();
//记录错误的数据
}
}
});
int delaySeconds = 2;
int windowSize = 5;
SingleOutputStreamOperator<OrderMainBean> orderMainWithWaterMark =
orderMainBeanDS.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor
<OrderMainBean>(Time.seconds(delaySeconds)) {
@Override
public long extractTimestamp(OrderMainBean element) {
return element.getCreate_time().getTime();
}
});
SingleOutputStreamOperator<OrderDetailBean> orderDetailWithWaterMark
=
orderDetailBeanDS.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor
<OrderDetailBean>(Time.seconds(delaySeconds)) {
@Override
public long extractTimestamp(OrderDetailBean element) {
return element.getCreate_time().getTime();
}
});
DataStream<Tuple2<OrderDetailBean, OrderMainBean>> joined =
orderDetailWithWaterMark.coGroup(orderMainWithWaterMark)
.where(new KeySelector<OrderDetailBean, Long>() {
@Override
public Long getKey(OrderDetailBean value) throws Exception {
return value.getOrder_id();
}
})
.equalTo(new KeySelector<OrderMainBean, Long>() {
@Override
public Long getKey(OrderMainBean value) throws Exception {
return value.getOid();
}
})
.window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
.apply(new CoGroupFunction<OrderDetailBean, OrderMainBean, Tuple2<OrderDetailBean,
OrderMainBean>>() {
// 做join之后,输出的结果
@Override
public void coGroup(Iterable<OrderDetailBean> first, Iterable<OrderMainBean> second,
Collector<Tuple2<OrderDetailBean, OrderMainBean>> out) throws Exception {
for (OrderDetailBean orderDetailBean : first) {
boolean isJoined = false;
for (OrderMainBean orderMainBean : second) {
out.collect(Tuple2.of(orderDetailBean, orderMainBean));
isJoined = true;
}
if (!isJoined) {
out.collect(Tuple2.of(orderDetailBean, null));
}
}
}
});
没有join上的分两种情况:
OutputTag<OrderDetailBean> outputTag = new OutputTag<OrderDetailBean>("leftLate-date") {
};
// 划分窗口,和上面join的窗口大小和类型保持一致
SingleOutputStreamOperator<OrderDetailBean> orderDetailLateData =
orderDetailWithWaterMark.windowAll(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
.sideOutputLateData(outputTag)
.apply(new AllWindowFunction<OrderDetailBean, OrderDetailBean, TimeWindow>() {
@Override
public void apply(TimeWindow window, Iterable<OrderDetailBean> values,
Collector<OrderDetailBean> out) throws Exception {
//什么都不用操作,只是为迟到的数据打上标签
// 之所以用apply,因为要在窗口内的全量数据做操作
}
});
// 拿出左表迟到数据
DataStream<OrderDetailBean> leftLateDate = orderDetailLateData.getSideOutput(outputTag);
// 查库,关联右表(OrderMain)数据
SingleOutputStreamOperator<Tuple2<OrderDetailBean, OrderMainBean>> lateOrderDetailAndOrderMain =
leftLateDate.map(new RichMapFunction<OrderDetailBean, Tuple2<OrderDetailBean, OrderMainBean>>() {
private transient Connection conn = null;
// 创建JDBC连接
@Override
public void open(Configuration parameters) throws Exception {
conn = DriverManager.getConnection(
"jdbc:mysql://linux04:3306/doit12?characterEncoding=utf8",
"root",
"123456"
);
}
//查询数据库
@Override
public Tuple2<OrderDetailBean, OrderMainBean> map(OrderDetailBean value) throws Exception {
Long order_id = value.getOrder_id();
String type = value.getType();
OrderMainBean orderMainB = queryOrderMainFromMySQL(order_id, type, conn);
return Tuple2.of(value, orderMainB);
}
// 关闭JDBC连接
@Override
public void close() throws Exception {
conn.close();
}
});
SingleOutputStreamOperator<Tuple2<OrderDetailBean, OrderMainBean>> lateOrderMainAndOrderMain =
joined.map(new RichMapFunction<Tuple2<OrderDetailBean, OrderMainBean>, Tuple2<OrderDetailBean,
OrderMainBean>>() {
private transient Connection conn = null;
@Override
public void open(Configuration parameters) throws Exception {
conn = DriverManager.getConnection(
"jdbc:mysql://linux04:3306/doit12?characterEncoding=utf8",
"root",
"123456"
);
}
//查询数据库 根据左表(orderDetail)的查询条件,查询右表(orderMain)的数据
@Override
public Tuple2<OrderDetailBean, OrderMainBean> map(Tuple2<OrderDetailBean, OrderMainBean> value) throws Exception {
OrderMainBean orderMainB = null;
if (value.f1 == null) {
Long order_id = value.f0.getOrder_id();
String type = value.f0.getType();
orderMainB = queryOrderMainFromMySQL(order_id, type, conn);
}
return Tuple2.of(value.f0, orderMainB);
}
@Override
public void close() throws Exception {
conn.close();
}
});
DataStream<Tuple2<OrderDetailBean, OrderMainBean>> allOrderStream =
lateOrderMainAndOrderMain.union(lateOrderDetailAndOrderMain);
allOrderStream.print();
FlinkUtilsV2.getEnv().execute("OrderJoinAdv");
}
private static OrderMainBean queryOrderMainFromMySQL(Long order_id, String type, Connection conn) throws Exception {
ResultSet resultSet = null;
PreparedStatement preparedStatement = null;
String sql = "select oid, create_time, total_money, status, update_time, province, uid from ordermain" +
" where oid = ?";
preparedStatement = conn.prepareStatement(sql);
preparedStatement.setLong(1, order_id);
resultSet = preparedStatement.executeQuery();
long oid = resultSet.getLong("oid");
Date create_time = resultSet.getDate("create_time");
double total_money1 = resultSet.getDouble("total_money");
int status = resultSet.getInt("status");
Date update_time = resultSet.getDate("update_time");
String province = resultSet.getString("province");
String uid = resultSet.getString("uid");
OrderMainBean orderMain = new OrderMainBean(oid, create_time, total_money1, status, update_time,
province, uid, type);
return orderMain;
}
}