转载请署名:印风
------------------------------------------------------------------------------------
之前已经介绍过实现思路,binlog预分配在两年前被Yoshinori Matsunobu在5.1里实现,但其存在问题的是,即如果在非xfs系统上时,可能会在预分配文件时因为持有大锁,导致tps长时间为0。这里转换思路,使用一个daemon plugin来做文件分配,当binlog切换时,直接将预分配好的文件(命名为mysql-binlog.PA) rename成binlog文件。
一组简单的测试数据,之前我在percona-discuess 上发过,直接拿过来了...
以下Patch基于Percona Server5.5.24,目前处于测试中,比较简陋...
cmake时需要增加选项 -DWITH_BINLOG_PREALLOC=ON
Index: a/CMakeLists.txt
===================================================================
--- a.orig/CMakeLists.txt
+++ a/CMakeLists.txt
@@ -161,6 +161,11 @@ INCLUDE(install_layout)
INCLUDE(mysql_add_executable)
# Handle options
+OPTION(WITH_BINLOG_PREALLOC "if allow binlog file prealloced" OFF)
+IF(WITH_BINLOG_PREALLOC)
+ADD_DEFINITIONS(-DWITH_BINLOG_PREALLOC)
+ENDIF()
+
OPTION(DISABLE_SHARED
"Don't build shared libraries, compile code as position-dependent" OFF)
IF(DISABLE_SHARED)
Index: a/include/my_global.h
===================================================================
--- a.orig/include/my_global.h
+++ a/include/my_global.h
@@ -1501,4 +1501,8 @@ static inline double rint(double x)
#endif /* EMBEDDED_LIBRARY */
+#if defined (HAVE_POSIX_FALLOCATE) && defined(WITH_BINLOG_PREALLOC)
+#define BINLOG_PREALLOC
+#endif
+
#endif /* my_global_h */
Index: a/plugin/daemon_example/CMakeLists.txt
===================================================================
--- a.orig/plugin/daemon_example/CMakeLists.txt
+++ a/plugin/daemon_example/CMakeLists.txt
@@ -17,3 +17,6 @@ MYSQL_ADD_PLUGIN(daemon_example daemon_e
MODULE_ONLY MODULE_OUTPUT_NAME "libdaemon_example")
INSTALL(FILES daemon_example.ini DESTINATION ${INSTALL_PLUGINDIR})
+
+MYSQL_ADD_PLUGIN(binlog_prealloc binlog_prealloc.cc
+ MODULE_ONLY MODULE_OUTPUT_NAME "libbinlog_prealloc")
Index: a/plugin/daemon_example/binlog_prealloc.cc
===================================================================
--- /dev/null
+++ a/plugin/daemon_example/binlog_prealloc.cc
@@ -0,0 +1,111 @@
+#ifndef MYSQL_SERVER
+#define MYSQL_SERVER
+#endif
+
+#include
+#include
+#include
+#include "my_global.h"
+#include
+#include
+#include "log.h"
+
+#if !defined(__attribute__) && (defined(__cplusplus) || !defined(__GNUC__) || __GNUC__ == 2 && __GNUC_MINOR__ < 8)
+#define __attribute__(A)
+#endif
+
+/*defined in log.cc*/
+static pthread_t bin_prealloc_thread;
+extern unsigned long max_binlog_size;
+extern my_bool binlog_prealloc_inited;
+extern ulong binlog_prealloc ;
+extern my_bool use_plugin_prealloc;
+extern my_bool has_prealloc_next;
+extern pthread_mutex_t binlog_prealloc_mutex;
+extern pthread_cond_t binlog_prealloc_cond;
+extern char prealloc_file[FN_REFLEN];
+extern int create_prealloc_file(char *filename);
+
+pthread_handler_t bin_prealloc_func(void *p)
+{
+
+ int fd;
+ long i = 0;
+ int ret = 0;
+ int len = 0;
+
+ while (1){
+ if ( binlog_prealloc == 0 ||
+ !binlog_prealloc_inited ) {
+ sleep(2);
+ continue;
+ }
+
+ pthread_mutex_lock(&(binlog_prealloc_mutex));
+ if (has_prealloc_next)
+ pthread_cond_wait(&(binlog_prealloc_cond), &(binlog_prealloc_mutex));
+
+ int ret = create_prealloc_file(prealloc_file);
+ if (unlikely(use_plugin_prealloc) == FALSE)
+ use_plugin_prealloc = TRUE;
+
+ has_prealloc_next = TRUE;
+
+ pthread_mutex_unlock(&(binlog_prealloc_mutex));
+ }
+
+ return 0;
+}
+
+
+static int bin_prealloc_init(void *p)
+{
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+ use_plugin_prealloc = FALSE;
+
+ if (pthread_create(&bin_prealloc_thread, &attr,
+ bin_prealloc_func, NULL) != 0){
+
+ fprintf(stderr, "Plugin 'bin_prealloc': "
+ "Could not create bin_prealloc thread!\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int bin_prealloc_deinit(void *p)
+{
+ pthread_cancel(bin_prealloc_thread);
+ pthread_join(bin_prealloc_thread, NULL);
+
+ has_prealloc_next = FALSE;
+ use_plugin_prealloc = FALSE;
+ return 0;
+}
+
+
+struct st_mysql_daemon bin_prealloc = { MYSQL_DAEMON_INTERFACE_VERSION };
+
+mysql_declare_plugin(bin_prealloc)
+{
+ MYSQL_DAEMON_PLUGIN,
+ &bin_prealloc,
+ "binlog_prealloc",
+ "[email protected]",
+ "a daemon plugin to prealloc binlog file",
+ PLUGIN_LICENSE_GPL,
+ bin_prealloc_init,
+ bin_prealloc_deinit,
+ 0x0100,
+ NULL,
+ NULL,
+ NULL
+}
+mysql_declare_plugin_end;
+
Index: a/sql/log_event.cc
===================================================================
--- a.orig/sql/log_event.cc
+++ a/sql/log_event.cc
@@ -65,6 +65,10 @@
*/
#define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1)
+#ifdef BINLOG_PREALLOC
+extern ulonglong fetch_active_size(void);
+extern ulonglong use_binlog_prealloc;
+#endif
#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD* thd);
@@ -1018,7 +1022,14 @@ int Log_event::read_log_event(IO_CACHE*
int result=0;
char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
DBUG_ENTER("Log_event::read_log_event");
-
+#ifdef BINLOG_PREALLOC
+ if (use_binlog_prealloc && file->file_name && file->type == READ_CACHE) {
+ if (mysql_bin_log.is_active(file->file_name))
+ file->end_of_file= fetch_active_size();
+ else
+ file->end_of_file= ~(my_off_t) 0;
+ }
+#endif
if (log_lock)
mysql_mutex_lock(log_lock);
if (my_b_read(file, (uchar*) buf, sizeof(buf)))
Index: a/sql/log.cc
===================================================================
--- a.orig/sql/log.cc
+++ a/sql/log.cc
@@ -90,6 +90,112 @@ static SHOW_VAR binlog_status_vars_detai
{NullS, NullS, SHOW_LONG}
};
+#ifdef BINLOG_PREALLOC
+
+ulonglong active_binlog_size = 0;
+ulong binlog_prealloc = 0;
+
+my_bool use_binlog_prealloc = FALSE;
+my_bool binlog_prealloc_inited = FALSE;
+my_bool use_plugin_prealloc = FALSE;
+my_bool has_prealloc_next = FALSE;
+
+pthread_mutex_t binlog_prealloc_mutex;
+pthread_cond_t binlog_prealloc_cond;
+
+char prealloc_file[FN_REFLEN];
+
+static void init_binlog_prealloc(const char * name)
+{
+ use_plugin_prealloc = FALSE;
+ has_prealloc_next = FALSE;
+
+ bzero(prealloc_file, FN_REFLEN);
+
+ /*hardcode the prealloc file name */
+ fn_format(prealloc_file, name, mysql_data_home, "", 4);
+ size_t length = strlen(prealloc_file);
+ prealloc_file[length] = '.' ;
+ prealloc_file[length+1] = 'P' ;
+ prealloc_file[length+2] = 'A';
+ prealloc_file[length+3] = '\0';
+
+ pthread_mutex_init(&(binlog_prealloc_mutex), NULL);
+ pthread_cond_init(&(binlog_prealloc_cond), NULL);
+ binlog_prealloc_inited = TRUE;
+}
+
+ulonglong set_active_size(ulonglong new_val)
+{
+ return __sync_val_compare_and_swap(&active_binlog_size,
+ active_binlog_size, new_val);
+}
+
+
+ulonglong fetch_active_size(void)
+{
+ return __sync_add_and_fetch(&active_binlog_size,0);
+}
+
+int create_prealloc_file(char *file_name)
+{
+ int fd = 0;
+ int ret = 0;
+ fd = open(file_name, O_CREAT | O_RDWR, 0);
+ if (fd == -1)
+ return -1;
+
+ ret = posix_fallocate(fd, 0, max_binlog_size)
+ || my_sync(fd, MYF(MY_WME));
+
+ close(fd);
+ return ret;
+}
+
+
+int prealloc_binlog_with_newname(char *new_name)
+{
+ int ret = 0;
+
+ if (use_plugin_prealloc) {
+ ret = pthread_mutex_trylock(&(binlog_prealloc_mutex));
+
+ /*if can't get lock ,simply return to orignal mode(means no prealloc...) */
+ if (ret != 0)
+ return -1;
+
+ my_bool success = FALSE;
+
+ if (has_prealloc_next &&
+ access(prealloc_file, 0) == 0 &&
+ rename(prealloc_file, new_name) == 0)
+ success = TRUE;
+
+ has_prealloc_next = FALSE;
+
+ pthread_mutex_unlock(&(binlog_prealloc_mutex));
+
+ pthread_cond_broadcast(&(binlog_prealloc_cond));
+
+ if (!success)
+ return -1;
+
+ } else {
+
+ int ret = create_prealloc_file(new_name);
+
+ if (ret!= 0) {
+ fprintf(stderr, "Prealloc Binlog Failed:%s\n", new_name);
+ return -1;
+ }
+ }
+
+ chmod(new_name, 438);
+ use_binlog_prealloc = TRUE;
+ return 0;
+}
+
+#endif
/**
purge logs, master and slave sides both, related error code
convertor.
@@ -2122,6 +2228,12 @@ File open_binlog(IO_CACHE *log, const ch
*errmsg = "Could not open log file";
goto err;
}
+#ifdef BINLOG_PREALLOC
+ if (use_binlog_prealloc) {
+ log->file_name=(char *)log_file_name;
+ log->end_of_file= fetch_active_size();
+ }
+#endif
if (check_binlog_magic(log,errmsg))
goto err;
DBUG_RETURN(file);
@@ -3182,7 +3294,9 @@ bool MYSQL_BIN_LOG::open(const char *log
if (flush_io_cache(&log_file) ||
mysql_file_sync(log_file.file, MYF(MY_WME)))
goto err;
-
+#ifdef BINLOG_PREALLOC
+ set_active_size(log_file.pos_in_file);
+#endif
if (write_file_name_to_index_file)
{
#ifdef HAVE_REPLICATION
@@ -4378,7 +4492,16 @@ int MYSQL_BIN_LOG::new_file_impl(bool ne
old_name=name;
name=0; // Don't free name
close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX);
-
+#ifdef BINLOG_PREALLOC
+ /*try to pre-alloc binlog file,we don't care if this will fail*/
+ if (!is_relay_log && binlog_prealloc) {
+ if (unlikely(!binlog_prealloc_inited)) {
+ init_binlog_prealloc(old_name);
+ }
+ prealloc_binlog_with_newname(new_name_ptr);
+ } else
+ use_binlog_prealloc = FALSE;
+#endif
/*
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
*/
@@ -5156,6 +5279,9 @@ err:
else
{
bool check_purge;
+#ifdef BINLOG_PREALLOC
+ set_active_size(event_info->log_pos);
+#endif
signal_update();
error= rotate(false, &check_purge);
mysql_mutex_unlock(&LOCK_log);
@@ -5540,6 +5666,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *
if (!error && !(error= flush_and_sync(0)))
{
bool check_purge= false;
+#ifdef BINLOG_PREALLOC
+ set_active_size(ev.log_pos);
+#endif
signal_update();
error= rotate(false, &check_purge);
mysql_mutex_unlock(&LOCK_log);
@@ -5695,6 +5824,9 @@ void
MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
{
DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+#ifdef BINLOG_PREALLOC
+ ulonglong last_actual_pos = 0;
+#endif
uint xid_count= 0;
uint write_count= 0;
bool check_purge= false;
@@ -5753,6 +5885,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
}
cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file);
+#ifdef BINLOG_PREALLOC
+ last_actual_pos = cache_data->commit_bin_log_file_pos;
+#endif
if (cache_data->using_xa && cache_data->xa_xid)
xid_count++;
}
@@ -5773,6 +5908,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
}
else
{
+#ifdef BINLOG_PREALLOC
+ set_active_size(last_actual_pos);
+#endif
signal_update();
}
@@ -6005,6 +6143,18 @@ void MYSQL_BIN_LOG::close(uint exiting)
original position on system that doesn't support pwrite().
*/
mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
+#ifdef BINLOG_PREALLOC
+ end_io_cache(&log_file);
+ DBUG_ASSERT(is_active(log_file_name));
+ mysql_mutex_assert_owner(&LOCK_log);
+ set_active_size(log_file.pos_in_file);
+ if (use_binlog_prealloc && my_chsize(log_file.file,
+ log_file.pos_in_file, 0, MYF(MY_WME)))
+ {
+ write_error= 1;
+ sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ }
+#endif
}
/* this will cleanup IO_CACHE, sync and close the file */
Index: a/sql/sys_vars.cc
===================================================================
--- a.orig/sql/sys_vars.cc
+++ a/sql/sys_vars.cc
@@ -3330,6 +3330,12 @@ static Sys_var_uint Sys_slave_net_timeou
VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1),
NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
ON_UPDATE(fix_slave_net_timeout));
+#ifdef BINLOG_PREALLOC
+static Sys_var_ulong Sys_binlog_prealloc(
+ "binlog_prealloc", "default 0 , if binlog_prealloc >0, means prealloc binlog file",
+ GLOBAL_VAR(binlog_prealloc), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0,100), DEFAULT(0), BLOCK_SIZE(1));
+#endif
static bool check_slave_skip_counter(sys_var *self, THD *thd, set_var *var)
{
Index: a/sql/mysqld.h
===================================================================
--- a.orig/sql/mysqld.h
+++ a/sql/mysqld.h
@@ -238,7 +238,9 @@ extern I_List threads;
extern char err_shared_dir[];
extern TYPELIB thread_handling_typelib;
extern my_decimal decimal_zero;
-
+#ifdef BINLOG_PREALLOC
+extern ulong binlog_prealloc;
+#endif
extern ulonglong opt_log_warnings_suppress;
extern char* enforce_storage_engine;