1.业务数据的采集
2.Canal的使用
1.添加⼀个普通⽤户
CREATE USER canal IDENTIFIED BY 'canal123'; 创建一个用户
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON . TO 'canal'@'%'; 添加权限
FLUSH PRIVILEGES; 刷新
如果失败:
修改密码的安全权限
set global validate_password_policy=LOW;
set global validate_password_length=6;
2. 修改MySQL的配置
canal的原理是基于mysql binlog技术,所以这⾥⼀定需要开启mysql的binlog写⼊功能,建议配置binlog模式为row
vi /etc/my.cnf
[mysqld]
log-bin=mysql-bin #添加这⼀⾏就ok
binlog-format=ROW #选择row模式
server_id=1 #配置mysql replaction需要定义,不能和canal的slaveId重复
service mysqld restart 重启
3..修改Canal的配置⽂件
cd /bigdata/canal/
vi canal.properties
#canal跟kafka整合,将数据发送到kafka
canal.serverMode = kafka
#指定kafka broker地址
canal.mq.servers = linux03:9092,linux04:9092,linux05:9092
#数据发送kafka失败重试次数
canal.mq.retries = 10
修改canal的实例配置⽂件
vi conf/example/instance.properties vi conf/example/instance.properties
#mysql数据库的地址
canal.instance.master.address=127.0.0.1:3306
#mysql⽤户名
canal.instance.dbUsername=canal
#mysql密码
canal.instance.dbPassword=canal123
#注释掉使⽤默认的topic(将数据写⼊到默认的Topic)
#canal.mq.topic=example
# dynamic topic route by schema or table regex
#使⽤动态topic,将doit的数据库的test表发送到kafka的test1的topic
#将hhht的数据库的user表发送到kafka的user1的topic
canal.mq.dynamicTopic=topic1:db1\\.tb1,user1:hhh\\.user
4.启动Canal
bin/startup.sh
5.问题的处理
启动不起来
1.删除mysql⽬录下的数据
rm -rf /var/lib/mysql
mysqld --initialize --user=mysql
3.join案例
2021-01-26 09:00:01,user1,tuan001,手机,product005,9.9,create
2021-01-26 09:00:08,user2,tuan001,手机,product005,9.9,join
2021-01-26 09:00:08,user2,tuan001,手机,product005,9.9,join
2021-01-26 09:00:08,tuan001,3,手机,product005,9.9,success
2021-01-26 09:00:10,user5,tuan002,电脑,product009,9.9,create
2021-01-26 09:00:08,user3,tuan002,电脑,product009,9.9,join
2021-01-26 09:00:08,user4,tuan002,电脑,product009,9.9,join
2021-01-26 09:00:08,tuan002,3,电脑,product009,9.9,success
2021-01-26 09:00:01,user1,tuan003,服装,product007,9.9,create
2021-01-26 09:00:08,user2,tuan003,电脑,product009,9.9,join
2021-01-26 09:00:01,user2,tuan004,日用品,product010,9.9,create
2021-01-26 10:00:01,tuan004,日用品,product010,9.9,fail
1.从今天凌晨开始,到现在有多少个团,各种团的状态
2.参与开团的人数和次数
3.通过各个分类成交的开团金额
public class FlinkUtilsV2 {
private static StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
public static DataStream createKafkaDataStream(ParameterTool parameters, Class extends DeserializationSchema> clazz) throws Exception {
String topics = parameters.getRequired("kafka.topics");
String groupId = parameters.getRequired("group.id");
return createKafkaDataStream(parameters, topics, groupId, clazz);
}
public static DataStream createKafkaDataStream(ParameterTool parameters, String topics, Class extends DeserializationSchema> clazz) throws Exception {
String groupId = parameters.getRequired("group.id");
return createKafkaDataStream(parameters, topics, groupId, clazz);
}
public static DataStream createKafkaDataStream(ParameterTool parameters, String topics, String groupId, Class extends DeserializationSchema> clazz) throws Exception {
//将ParameterTool的参数设置成全局的参数
env.getConfig().setGlobalJobParameters(parameters);
//开启checkpoint
env.enableCheckpointing(parameters.getLong("checkpoint.interval", 10000L), CheckpointingMode.EXACTLY_ONCE);
env.getCheckpointConfig().setCheckpointingMode(CheckpointConfig.DEFAULT_MODE);
//重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(parameters.getInt("restart.times", 10), Time.seconds(5)));
//设置statebackend
String path = parameters.get("state.backend.path");
if(path != null) {
//最好的方式将setStateBackend配置到Flink的全局配置文件中flink-conf.yaml
env.setStateBackend(new FsStateBackend(path));
}
//设置cancel任务不用删除checkpoint
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.getCheckpointConfig().setMaxConcurrentCheckpoints(3);
//String topics = parameters.getRequired("kafka.topics");
List topicList = Arrays.asList(topics.split(","));
Properties properties = parameters.getProperties();
properties.setProperty("group.id", groupId);
//创建FlinkKafkaConsumer
FlinkKafkaConsumer kafkaConsumer = new FlinkKafkaConsumer(
topicList,
clazz.newInstance(),
properties
);
return env.addSource(kafkaConsumer);
}
public static StreamExecutionEnvironment getEnv() {
return env;
}
public class OrderMain {
private Long oid;
private Date create_time;
private Double total_money;
private int status;
private Date update_time;
private String province;
private String city;
//对数据库的操作类型:INSERT、UPDATE
private String type;
public Long getOid() {
return oid;
}
public void setOid(Long oid) {
this.oid = oid;
}
public Date getCreate_time() {
return create_time;
}
public void setCreate_time(Date create_time) {
this.create_time = create_time;
}
public Double getTotal_money() {
return total_money;
}
public void setTotal_money(Double total_money) {
this.total_money = total_money;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public Date getUpdate_time() {
return update_time;
}
public void setUpdate_time(Date update_time) {
this.update_time = update_time;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
@Override
public String toString() {
return "OrderMain{" +
"oid=" + oid +
", create_time=" + create_time +
", total_money=" + total_money +
", status=" + status +
", update_time=" + update_time +
", province='" + province + '\'' +
", city='" + city + '\'' +
", type='" + type + '\'' +
'}';
}
}
public class OrderDetail {
private Long id;
private Long order_id;
private int category_id;
private String categoryName;
private Long sku;
private Double money;
private int amount;
private Date create_time;
private Date update_time;
//对数据库的操作类型:INSERT、UPDATE
private String type;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public Long getOrder_id() {
return order_id;
}
public void setOrder_id(Long order_id) {
this.order_id = order_id;
}
public int getCategory_id() {
return category_id;
}
public void setCategory_id(int category_id) {
this.category_id = category_id;
}
public Long getSku() {
return sku;
}
public void setSku(Long sku) {
this.sku = sku;
}
public Double getMoney() {
return money;
}
public void setMoney(Double money) {
this.money = money;
}
public int getAmount() {
return amount;
}
public void setAmount(int amount) {
this.amount = amount;
}
public Date getCreate_time() {
return create_time;
}
public void setCreate_time(Date create_time) {
this.create_time = create_time;
}
public Date getUpdate_time() {
return update_time;
}
public void setUpdate_time(Date update_time) {
this.update_time = update_time;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getCategoryName() {
return categoryName;
}
public void setCategoryName(String categoryName) {
this.categoryName = categoryName;
}
@Override
public String toString() {
return "OrderDetail{" +
"id=" + id +
", order_id=" + order_id +
", category_id=" + category_id +
", categoryName='" + categoryName + '\'' +
", sku=" + sku +
", money=" + money +
", amount=" + amount +
", create_time=" + create_time +
", update_time=" + update_time +
", type='" + type + '\'' +
'}';
}
}
public class OrderJoin {
public static void main(String[] args) throws Exception {
ParameterTool parameters = ParameterTool.fromPropertiesFile(args[0]);
//使用EventTime作为时间标准
FlinkUtilsV2.getEnv().setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream orderMainLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "ordermain", "g1", SimpleStringSchema.class);
DataStream orderDetailLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "orderdetail", "g1", SimpleStringSchema.class);
//对数据进行解析
SingleOutputStreamOperator orderMainDataStream = orderMainLinesDataStream.process(new ProcessFunction() {
@Override
public void processElement(String line, Context ctx, Collector out) throws Exception {
//flatMap+filter
try {
JSONObject jsonObject = JSON.parseObject(line);
String type = jsonObject.getString("type");
if (type.equals("INSERT") || type.equals("UPDATE")) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
OrderMain orderMain = jsonArray.getObject(i, OrderMain.class);
orderMain.setType(type); //设置操作类型
out.collect(orderMain);
}
}
} catch (Exception e) {
//e.printStackTrace();
//记录错误的数据
}
}
});
//对数据进行解析
SingleOutputStreamOperator orderDetailDataStream = orderDetailLinesDataStream.process(new ProcessFunction() {
@Override
public void processElement(String line, Context ctx, Collector out) throws Exception {
//flatMap+filter
try {
JSONObject jsonObject = JSON.parseObject(line);
String type = jsonObject.getString("type");
if (type.equals("INSERT") || type.equals("UPDATE")) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
OrderDetail orderDetail = jsonArray.getObject(i, OrderDetail.class);
orderDetail.setType(type); //设置操作类型
out.collect(orderDetail);
}
}
} catch (Exception e) {
//e.printStackTrace();
//记录错误的数据
}
}
});
int delaySeconds = 2;
//提取EventTime生成WaterMark
SingleOutputStreamOperator orderMainStreamWithWaterMark = orderMainDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
@Override
public long extractTimestamp(OrderMain element) {
return element.getCreate_time().getTime();
}
});
SingleOutputStreamOperator orderDetailStreamWithWaterMark = orderDetailDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
@Override
public long extractTimestamp(OrderDetail element) {
return element.getCreate_time().getTime();
}
});
//Left Out JOIN,并且将订单明细表作为左表
DataStream> joined = orderDetailStreamWithWaterMark.coGroup(orderMainStreamWithWaterMark)
.where(new KeySelector() {
@Override
public Long getKey(OrderDetail value) throws Exception {
return value.getOrder_id();
}
})
.equalTo(new KeySelector() {
@Override
public Long getKey(OrderMain value) throws Exception {
return value.getOid();
}
})
.window(TumblingEventTimeWindows.of(Time.seconds(5)))
.apply(new CoGroupFunction>() {
@Override
public void coGroup(Iterable first, Iterable second, Collector> out) throws Exception {
for (OrderDetail orderDetail : first) {
boolean isJoined = false;
for (OrderMain orderMain : second) {
out.collect(Tuple2.of(orderDetail, orderMain));
isJoined = true;
}
if (!isJoined) {
out.collect(Tuple2.of(orderDetail, null));
}
}
}
});
joined.print();
FlinkUtilsV2.getEnv().execute();
}
}
public class OrderJoinAdv {
public static void main(String[] args) throws Exception {
ParameterTool parameters = ParameterTool.fromPropertiesFile(args[0]);
FlinkUtilsV2.getEnv().setParallelism(1);
//使用EventTime作为时间标准
FlinkUtilsV2.getEnv().setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream orderMainLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "ordermain", "g1", SimpleStringSchema.class);
DataStream orderDetailLinesDataStream = FlinkUtilsV2.createKafkaDataStream(parameters, "orderdetail", "g1", SimpleStringSchema.class);
//对数据进行解析
SingleOutputStreamOperator orderMainDataStream = orderMainLinesDataStream.process(new ProcessFunction() {
@Override
public void processElement(String line, Context ctx, Collector out) throws Exception {
//flatMap+filter
try {
JSONObject jsonObject = JSON.parseObject(line);
String type = jsonObject.getString("type");
if (type.equals("INSERT") || type.equals("UPDATE")) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
OrderMain orderMain = jsonArray.getObject(i, OrderMain.class);
orderMain.setType(type); //设置操作类型
out.collect(orderMain);
}
}
} catch (Exception e) {
//e.printStackTrace();
//记录错误的数据
}
}
});
//对数据进行解析
SingleOutputStreamOperator orderDetailDataStream = orderDetailLinesDataStream.process(new ProcessFunction() {
@Override
public void processElement(String line, Context ctx, Collector out) throws Exception {
//flatMap+filter
try {
JSONObject jsonObject = JSON.parseObject(line);
String type = jsonObject.getString("type");
if (type.equals("INSERT") || type.equals("UPDATE")) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
OrderDetail orderDetail = jsonArray.getObject(i, OrderDetail.class);
orderDetail.setType(type); //设置操作类型
out.collect(orderDetail);
}
}
} catch (Exception e) {
//e.printStackTrace();
//记录错误的数据
}
}
});
int delaySeconds = 2;
int windowSize = 5;
//提取EventTime生成WaterMark
SingleOutputStreamOperator orderMainStreamWithWaterMark = orderMainDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
@Override
public long extractTimestamp(OrderMain element) {
return element.getCreate_time().getTime();
}
});
SingleOutputStreamOperator orderDetailStreamWithWaterMark = orderDetailDataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(delaySeconds)) {
@Override
public long extractTimestamp(OrderDetail element) {
return element.getCreate_time().getTime();
}
});
//定义迟到侧流输出的Tag
OutputTag lateTag = new OutputTag("late-date") {};
//对左表进行单独划分窗口,窗口的长度与cogroup的窗口长度一样
SingleOutputStreamOperator orderDetailWithWindow = orderDetailStreamWithWaterMark.windowAll(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
.sideOutputLateData(lateTag) //将迟到的数据打上Tag
.apply(new AllWindowFunction() {
@Override
public void apply(TimeWindow window, Iterable values, Collector out) throws Exception {
for (OrderDetail value : values) {
out.collect(value);
}
}
});
//获取迟到的数据
DataStream lateOrderDetailStream = orderDetailWithWindow.getSideOutput(lateTag);
//应为orderDetail表的数据迟到数据不是很多,没必要使用异步IO,直接使用RichMapFunction
SingleOutputStreamOperator> lateOrderDetailAndOrderMain = lateOrderDetailStream.map(new RichMapFunction>() {
@Override
public Tuple2 map(OrderDetail detail) throws Exception {
return Tuple2.of(detail, null);
}
});
//Left Out JOIN,并且将订单明细表作为左表
DataStream> joined = orderDetailWithWindow.coGroup(orderMainStreamWithWaterMark)
.where(new KeySelector() {
@Override
public Long getKey(OrderDetail value) throws Exception {
return value.getOrder_id();
}
})
.equalTo(new KeySelector() {
@Override
public Long getKey(OrderMain value) throws Exception {
return value.getOid();
}
})
.window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
.apply(new CoGroupFunction>() {
@Override
public void coGroup(Iterable first, Iterable second, Collector> out) throws Exception {
for (OrderDetail orderDetail : first) {
boolean isJoined = false;
for (OrderMain orderMain : second) {
out.collect(Tuple2.of(orderDetail, orderMain));
isJoined = true;
}
if (!isJoined) {
out.collect(Tuple2.of(orderDetail, null));
}
}
}
});
joined.union(lateOrderDetailAndOrderMain).map(new RichMapFunction, Tuple2>() {
private transient Connection connection;
@Override
public void open(Configuration parameters) throws Exception {
//可以创建数据库连接
connection = DriverManager.getConnection("jdbc:mysql://172.16.100.100:3306/bigdata?characterEncoding=UTF-8", "root", "123456");
}
@Override
public Tuple2 map(Tuple2 tp) throws Exception {
//每个关联上订单主表的数据,就查询书库
if (tp.f1 == null) {
tp.f1 = queryOrderMainFromMySQL(tp.f0.getOrder_id(), connection);
}
return tp;
}
@Override
public void close() throws Exception {
//关闭数据库连接
}
}).print();
FlinkUtilsV2.getEnv().execute();
}
private static OrderMain queryOrderMainFromMySQL(Long order_id, Connection connection) throws Exception {
PreparedStatement preparedStatement = connection.prepareStatement("SELECT * ordermain WHERE oid = ?");
//设置参数
preparedStatement.setLong(1, order_id);
//执行查询
ResultSet resultSet = preparedStatement.executeQuery();
//取出结果
long oid = resultSet.getLong("oid");
Date createTime = resultSet.getDate("create_time");
double totalMoney = resultSet.getDouble("total_money");
int status = resultSet.getInt("status");
OrderMain orderMain = new OrderMain();
orderMain.setOid(oid);
orderMain.setStatus(status);
return orderMain;
}
}