设计一个在Linux上运行的GCC C++程序,同时连接两个不同的PostgreSQL实例,两个实例中分别有一个数据库的多个Schema的表结构完全相同,复制一个实例中一个数据库的多个Schema里的所有表的数据到另一个实例中一个数据库的多个Schema里,使用以下快速高效的方法,加入异常处理,支持每隔固定时间重试一定次数,每张表的复制运行状态和记录条数,开始结束时间戳,运行时间,以及每个批次的运行状态,开始结束时间戳,运行时间,总记录条数都要到运行日志文件里,日志文件全部放在logs目录下,每天一个.log扩展名的文本文件,文件名带日期,所有运行参数写在json配置文件中。
实现该程序的步骤和代码示例:
.
├── CMakeLists.txt
├── config.json
├── include
│ ├── config.h
│ ├── database.h
│ ├── logger.h
│ └── worker.h
├── src
│ ├── main.cpp
│ ├── config.cpp
│ ├── database.cpp
│ ├── logger.cpp
│ └── worker.cpp
└── third_party
└── json
{
"source": {
"host": "source-db.example.com",
"port": 5432,
"dbname": "source_db",
"user": "user",
"password": "pass"
},
"target": {
"host": "target-db.example.com",
"port": 5432,
"dbname": "target_db",
"user": "user",
"password": "pass"
},
"schemas": ["public", "sales"],
"retry": {
"max_attempts": 3,
"interval_seconds": 60
},
"batch_size": 1000,
"disable_indexes": true
}
cmake_minimum_required(VERSION 3.10)
project(pg_replicator)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(libpqxx REQUIRED)
find_package(Threads REQUIRED)
include_directories(
include
third_party/json/include
)
add_executable(pg_replicator
src/main.cpp
src/config.cpp
src/database.cpp
src/logger.cpp
src/worker.cpp
)
target_link_libraries(pg_replicator
PRIVATE
libpqxx
pthread
${CMAKE_THREAD_LIBS_INIT}
)
database.h:
#pragma once
#include
#include
struct DBConfig {
std::string host;
int port;
std::string dbname;
std::string user;
std::string password;
};
class DatabaseConnector {
public:
DatabaseConnector(const DBConfig& config);
pqxx::connection& get_connection();
private:
std::unique_ptr<pqxx::connection> conn_;
};
worker.h:
#pragma once
#include
#include "logger.h"
struct TableStats {
size_t total_records = 0;
std::chrono::milliseconds duration{0};
};
class DataCopier {
public:
DataCopier(DatabaseConnector& src, DatabaseConnector& tgt, Logger& logger);
TableStats copy_table(const std::string& schema, const std::string& table);
void copy_schema(const std::string& schema);
private:
void disable_indexes(pqxx::transaction_base& txn, const std::string& schema, const std::string& table);
void enable_indexes(pqxx::transaction_base& txn, const std::string& schema, const std::string& table);
DatabaseConnector& src_;
DatabaseConnector& tgt_;
Logger& logger_;
};
logger.cpp:
#include "logger.h"
#include
#include
#include
#include
namespace fs = std::filesystem;
class Logger::Impl {
public:
Impl() {
fs::create_directories("logs");
update_file();
}
void log(const LogEntry& entry) {
std::lock_guard<std::mutex> lock(mutex_);
check_rollover();
file_ << entry.to_string() << std::endl;
}
private:
void update_file() {
auto now = std::chrono::system_clock::now();
auto today = std::chrono::floor<std::chrono::days>(now);
filename_ = "logs/log_" +
std::format("{:%Y%m%d}", today) + ".log";
file_.close();
file_.open(filename_, std::ios::app);
}
void check_rollover() {
auto now = std::chrono::system_clock::now();
if (now >= next_check_) {
update_file();
next_check_ = now + std::chrono::hours(1);
}
}
std::ofstream file_;
std::string filename_;
std::mutex mutex_;
std::chrono::system_clock::time_point next_check_;
};
Logger::Logger() : impl_(std::make_unique<Impl>()) {}
Logger::~Logger() = default;
void Logger::log(const LogEntry& entry) {
impl_->log(entry);
}
worker.cpp (关键复制逻辑):
TableStats DataCopier::copy_table(const std::string& schema, const std::string& table) {
TableStats stats;
auto start = std::chrono::steady_clock::now();
pqxx::work src_txn(src_.get_connection());
pqxx::work tgt_txn(tgt_.get_connection());
try {
// Disable indexes if configured
if (config_.disable_indexes) {
disable_indexes(tgt_txn, schema, table);
}
// Use COPY for high-performance data transfer
std::string copy_query =
"COPY " + src_txn.quote_name(schema) + "." +
src_txn.quote_name(table) + " TO STDOUT";
auto stream = src_txn.copy_to(copy_query);
std::string insert_query =
"COPY " + tgt_txn.quote_name(schema) + "." +
tgt_txn.quote_name(table) + " FROM STDIN";
tgt_txn.copy_to(insert_query, stream);
// Get record count
auto count = tgt_txn.query_value<size_t>(
"SELECT COUNT(*) FROM " +
tgt_txn.quote_name(schema) + "." +
tgt_txn.quote_name(table)
);
if (config_.disable_indexes) {
enable_indexes(tgt_txn, schema, table);
}
tgt_txn.commit();
src_txn.commit();
auto end = std::chrono::steady_clock::now();
stats.duration =
std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
stats.total_records = count;
logger_.log({
.timestamp = std::chrono::system_clock::now(),
.schema = schema,
.table = table,
.status = "SUCCESS",
.records_copied = count,
.duration_ms = stats.duration.count()
});
} catch (const std::exception& e) {
tgt_txn.abort();
src_txn.abort();
logger_.log({
.timestamp = std::chrono::system_clock::now(),
.schema = schema,
.table = table,
.status = "FAILED",
.error_message = e.what()
});
throw;
}
return stats;
}
mkdir build
cd build
cmake ..
make -j4
./pg_replicator --config ../config.json
关键优化措施:
异常处理机制:
日志示例:
2023-10-20T14:23:45Z INFO [public.users] STARTED
2023-10-20T14:23:47Z INFO [public.users] COPIED 15382 records (2014ms)
2023-10-20T14:23:48Z ERROR [sales.orders] Connection lost, retrying (1/3)
2023-10-20T14:24:48Z INFO [sales.orders] RETRY SUCCESS
2023-10-20T14:24:50Z INFO [sales.orders] COPIED 238952 records (2156ms)
这个实现方案提供了:
实际部署时可以根据具体需求调整批量大小、线程数量、重试策略等参数,并通过监控日志文件来观察复制状态和性能指标。