ElasticSearch和Mysql同步数据的方法

1. 对于需要在ES中进行查询的表格,增加字段UPDATE_TIME,ES同步数据时根据该字段同步插入和更新的数据。在增加和修改数据时,MySql会自动更新UPDATE_TIME为当前时间,不需要人工进行管理

ALTER TABLE `table_name` ADD COLUMN `UPDATE_TIME` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间' ;

需要注意的是,如果数据库表已经有数据了,则需要分两次进行插入(Mysql5.5),否则直接执行上面的语句,update_time的值会变为0000-00-00

alter table table_name add column update_time TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间';
alter table table_name modify column update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'

2. 对于需要在ES中进行查询的表格,增加触发器,在触发器中向另一个表中插入数据,用于记录删除的表名和对应的数据ID

CREATE TABLE T_ELASTIC_SEARCH_DELETE (
  ID BIGINT NOT NULL AUTO_INCREMENT,
  TABLE_NAME VARCHAR(256),
  OLD_ID BIGINT,
  PRIMARY KEY (ID)
);


DROP  TRIGGER table_name_AFTER_DELETE;
DELIMITER ||
CREATE TRIGGER `table_name_AFTER_DELETE` AFTER DELETE ON `table_name` FOR EACH ROW
BEGIN
	INSERT INTO T_ELASTIC_SEARCH_DELETE(TABLE_NAME, OLD_ID) VALUES ('table_name', OLD.ID);
END||

DELIMITER ;

3. 在代码中增加定时任务,读取数据库的值,然后删除ES中对应的文档

String sql = "SELECT ID, TABLE_NAME, OLD_ID FROM T_ELASTIC_SEARCH_DELETE";

Set idList = new HashSet<>();
int limit = 100;
Map> indexDelete = new HashMap<>();
Map> dbDelete = new HashMap<>();
for (int i = 0;; i += limit) {
    QueryResult qr = entityMgr.readDataList(sql, i, limit);
    if (qr.getLength() == 0) {
        break;
    }
    for (Object[] data : qr.getData()) {
        // 索引的名字
        String index = "dbName" + "_" + data[1];
        // 删除ES中的数据
        Set oldIds = indexDelete.get(index);
        if (oldIds == null) {
            oldIds = new HashSet<>();
            indexDelete.put(index, oldIds);
        }
        oldIds.add((Long)Utils.parseLong(data[2]));
        // 用于删除数据库数据
        Set ids = dbDelete.get(index);
        if (ids == null) {
            ids = new HashSet<>();
            dbDelete.put(index, ids);
        }
        ids.add((Long)Utils.parseLong(data[0]));
    }
}
for (String index : indexDelete.keySet()) {
    try {
        Set ids = indexDelete.get(index);
        if (ids.size() == 0) {
            continue;
        }
        DeleteByQueryRequest request = new DeleteByQueryRequest(index);
        request.setQuery(QueryBuilders.termsQuery("id", ids.toArray(new Object[0])));
        // 获取ES的RestHighLevelClient,执行删除操作
        EsUtils.getClient().deleteByQuery(request, RequestOptions.DEFAULT);
        idList.addAll(dbDelete.get(index));
    } catch (IOException e) {
        e.printStackTrace();
    }
}
if (idList.size() > 0) {
    String deleteSql = "DELETE FROM T_ELASTIC_SEARCH_DELETE WHERE ID IN (" + SysUtils.collectionToString(idList, "," , null) + ")";
    entityMgr.update(deleteSql);
}

注意:在配置logstash时,自动将索引的名字写为了dbName_tableName,方便后续处理

4. 配置Logstash(Logstash用于同步数据库中的增加和修改操作),对应配置文件

input { 
  stdin { }
  jdbc {
    jdbc_driver_library => "mysql-connector-java-5.1.43.jar"
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => "jdbc:mysql://192.168.6.27:3306/dbName"
    jdbc_user => "root"
    jdbc_password => "123456"
    jdbc_paging_enabled => "true"
    jdbc_page_size => "50000"
     schedule => "* * * * *"
    use_column_value => true
    tracking_column => "update_time"
    tracking_column_type => "timestamp"
    statement => "select * from table_name where update_time >= :sql_last_value"
    type => "dbName_table_name"
    last_run_metadata_path => "../path/dbName_table_name.log"
  }
 }
output {
  elasticsearch {
	index => "%{type}"
	document_id => "%{id}"
	document_type => "%{type}"   # <- use the type from each input
	hosts => "localhost:9200"
  }
  stdout { codec => rubydebug }
}

 

你可能感兴趣的:(Mysql,ElasticSearch,Java)