canal解析sql数据库的binlog并格式化数据,然后同步到kafka消息,可以用来实现实时etl
yml:
spring:
application:
name: canal
canal:
topic-prefix: etl_timely.
destination:
example: 0
username:
password:
dealy-limit: 2000
kafka:
bootstrap-servers: 127.0.0.1:9092
producer:
acks: 1
batch-size: 100
client-id: canal
retries: 3
key-serializer: org.apache.kafka.common.serialization.StringSerializer
value-serializer: org.apache.kafka.common.serialization.StringSerializer
bootstrap-servers: 127.0.0.1:9092
consumer:
enable-auto-commit: true
group-id: etl
bootstrap-servers: 127.0.0.1:9092
template:
default-topic: etl_canal
pom:
org.springframework.boot
spring-boot-starter-web
org.springframework.boot
spring-boot-starter-test
test
com.alibaba.otter
canal.client
1.0.25
org.bouncycastle
bcprov-jdk15on
RELEASE
compile
org.springframework.boot
spring-boot-configuration-processor
true
org.apache.kafka
kafka_2.11
org.apache.kafka
kafka-clients
org.springframework.kafka
spring-kafka
org.bouncycastle
bcprov-jdk15on
RELEASE
compile
org.projectlombok
lombok
1.18.0
provided
CanalClient:
package com.kexin.canal.client;
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.common.utils.AddressUtils;
import com.alibaba.otter.canal.protocol.Message;
import com.kexin.canal.config.DisruptorConfig;
import com.kexin.canal.service.KafkaService;
import com.lmax.disruptor.BlockingWaitStrategy;
import com.lmax.disruptor.EventFactory;
import com.lmax.disruptor.RingBuffer;
import com.lmax.disruptor.dsl.Disruptor;
import com.lmax.disruptor.dsl.ProducerType;
import lombok.Getter;
import lombok.Setter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
import java.net.InetSocketAddress;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
/**
* @Author KeXin
* @Date 2018/7/20 下午5:16
**/
@Component
@ConfigurationProperties(prefix = "spring.canal")
public class CanalClient implements CommandLineRunner{
@Getter
@Setter
private String username;
@Getter
@Setter
private String password;
@Getter
@Setter
private Map destination;
@Autowired
KafkaService kafkaService;
@Override
public void run(String... args) {
int port = 11111;
startClient(port,destination,username,password);
}
/**
* 监测数据库变化,使用disruptor处理消息
*/
public void startClient(int port,Map canalDestination,String canalUsername,String canalPassword){
canalDestination.forEach((database,partition)->{
//启动不加锁队列
RingBuffer buffer = getDisruptor().start();
new Thread(()->{
// 创建链接
CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress(AddressUtils.getHostIp(),
port), database, canalUsername, canalPassword);
int batchSize = 100;
try {
connector.connect();
connector.subscribe();
connector.rollback();
while (true) {
Message message = connector.getWithoutAck(batchSize); // 获取指定数量的数据
long batchId = message.getId();
int size = message.getEntries().size();
if (batchId == -1 || size == 0) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
} else {
//发送至kafka
kafkaService.sendMessage(message.getEntries());
}
connector.ack(batchId); // 提交确认
// connector.rollback(batchId); // 处理失败, 回滚数据
}
} finally {
connector.disconnect();
}
}).start();
});
}
解析binlog为自己需要的格式:
package com.kexin.canal.service;
import com.alibaba.fastjson.JSON;
import com.alibaba.otter.canal.protocol.CanalEntry;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @Author KeXin
* @Date 2018/7/20 下午4:56
**/
@Component
public class KafkaService {
@Autowired
KafkaTemplate kafkaTemplate;
@Value("${spring.canal.topic-prefix}")
private String canalTopicPrefix;
/**
* 发送消息
* @param entrys
*/
public void sendMessage(List entrys) {
for (CanalEntry.Entry entry : entrys) {
if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN || entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) {
continue;
}
CanalEntry.RowChange rowChange = null;
try {
rowChange = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
} catch (Exception e) {
throw new RuntimeException("ERROR ## parser of eromanga-event has an error , data:" + entry.toString(),
e);
}
CanalEntry.EventType eventType = rowChange.getEventType();
String tableName = entry.getHeader().getTableName();
String schemaName = entry.getHeader().getSchemaName();
long executeTime = entry.getHeader().getExecuteTime();
//根据binlog的filename和position来定位
System.out.println(String.format("================> binlog[%s:%s] , name[%s,%s] , eventType : %s",
entry.getHeader().getLogfileName(), entry.getHeader().getLogfileOffset(),
entry.getHeader().getSchemaName(), entry.getHeader().getTableName(),
eventType));
for (CanalEntry.RowData rowData : rowChange.getRowDatasList()) {
Map map = new HashMap<>();
map.put("event_timestamp", executeTime);
map.put("table_name", tableName);
map.put("database_name", schemaName);
Map map_info = new HashMap<>();
if (eventType == CanalEntry.EventType.DELETE) {
map.put("event_op_type", "delete");
for(CanalEntry.Column column : rowData.getBeforeColumnsList()){
if(column.getValue()!=null&&!column.getValue().equals(""))
map_info.put(column.getName(), column.getValue());
}
} else if(eventType == CanalEntry.EventType.INSERT){
map.put("event_op_type", "insert");
for(CanalEntry.Column column : rowData.getAfterColumnsList()){
map_info.put(column.getName(), column.getValue());
}
}else {
map.put("event_op_type", "update");
for(CanalEntry.Column column : rowData.getAfterColumnsList()){
map_info.put(column.getName(), column.getValue());
}
Map beforeMap = new HashMap<>();
for(CanalEntry.Column column : rowData.getBeforeColumnsList()){
if(column.getValue()!=null&&!column.getValue().equals(""))
beforeMap.put(column.getName(), column.getValue());
}
map.put("beforeColumns", beforeMap);
}
map.put("map_info",map_info);
System.out.println(map);
kafkaTemplate.send( canalTopicPrefix + tableName, JSON.toJSONString(map));
}
}
}
}