2023-11-02 monetdb-事务提交-wal日志写入-分析

摘要:

2023-11-02 monetdb-事务提交-日志写入-分析

场景:

  1. 向mdb中写入数据
  2. 查看在事务commit时, mdb所作的处理
  3. 其中最为关键是记录wal日志, 分析具体的处理
  4. wal日志可以理解为redo log, 在真正做数据写入磁盘前, 先写入redo log
  5. 目的是为了在数据写磁盘发生错误时, 可以用redo log中的日志做重写, 保证持久性

核心流程:

#0  log_constant (lg=0x7f0fe35e7130, type=1, val=0x7f138c7f497f, id=6680, offset=35, cnt=5) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/gdk/gdk_logger.c:2508
#1  0x00007f13d9955e0a in log_segment (tr=0x7f0fe37756c0, s=0x7f0fe38a0950, id=6680) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/storage/bat/bat_storage.c:3427
#2  0x00007f13d9955ecd in log_segments (tr=0x7f0fe37756c0, segs=0x7f0fe38e6880, id=6680) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/storage/bat/bat_storage.c:3438
#3  0x00007f13d99587de in log_storage (tr=0x7f0fe37756c0, t=0x7f0fe38e60b0, s=0x7f0fe38e6810) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/storage/bat/bat_storage.c:4043
#4  0x00007f13d995970f in log_update_del (tr=0x7f0fe37756c0, change=0x7f0fe37523f0) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/storage/bat/bat_storage.c:4312
#5  0x00007f13d9910a8f in sql_trans_commit (tr=0x7f0fe37756c0) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/storage/store.c:4047
#6  0x00007f13d992350c in sql_trans_end (s=0x7f0fe388ff80, ok=1) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/storage/store.c:7201
#7  0x00007f13d99864a3 in mvc_commit (m=0x7f0fe3cc3c40, chain=0, name=0x0, enabling_auto_commit=true) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/sql/server/sql_mvc.c:548
#8  0x00007f13dbb5ba05 in monetdbe_set_commit (dbhdl=0x7f130c0112a0, value=1) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/monetdb/tools/monetdbe/monetdbe.c:1047
#9  0x00000000037a155d in Mondetdb_Adaptor::execute_commit (this=0x7f13c27d0300, ok=true) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/monetdb_adaptor/monetdb_adaptor.cc:341
#10 0x00000000037a4490 in Monetdb_Execute::~Monetdb_Execute (this=0x7f138c7f6fa0, __in_chrg=)
    at /root/work/trunk/mysql-server-mysql-8.0.33/sql/monetdb_adaptor/monetdb_adaptor.cc:896
#11 0x0000000003d37041 in Sql_cmd_insert_values::execute_inner (this=0x7f0fe3b82ec8, thd=0x7f130c001040) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/sql_insert.cc:801
#12 0x0000000003650c4c in Sql_cmd_dml::execute (this=0x7f0fe3b82ec8, thd=0x7f130c001040) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/sql_select.cc:578
#13 0x00000000035c9b58 in mysql_execute_command (thd=0x7f130c001040, first_level=true) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/sql_parse.cc:3685
#14 0x00000000035cf38f in dispatch_sql_command (thd=0x7f130c001040, parser_state=0x7f138c7f88b0) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/sql_parse.cc:5398
#15 0x00000000035c4eb2 in dispatch_command (thd=0x7f130c001040, com_data=0x7f138c7f99a0, command=COM_QUERY) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/sql_parse.cc:2052
#16 0x00000000035c2dfa in do_command (thd=0x7f130c001040) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/sql_parse.cc:1441
#17 0x00000000038034ee in handle_connection (arg=0xca4b270) at /root/work/trunk/mysql-server-mysql-8.0.33/sql/conn_handler/connection_handler_per_thread.cc:302
#18 0x0000000005813951 in pfs_spawn_thread (arg=0xcacb6b0) at /root/work/trunk/mysql-server-mysql-8.0.33/storage/perfschema/pfs.cc:3042
#19 0x00007f13db8741ca in start_thread () from /lib64/libpthread.so.0
#20 0x00007f13d9e24e73 in clone () from /lib64/libc.so.6

核心函数:

log_constant

函数源码:

gdk_return
log_constant(logger *lg, int type, ptr val, log_id id, lng offset, lng cnt)
{
	bte tpe = find_type(lg, type);
	gdk_return ok = GDK_SUCCEED;
	logformat l;
	lng nr;
	l.flag = LOG_UPDATE_CONST;
	l.id = id;
	nr = cnt;

	if (LOG_DISABLED(lg) || !nr) {
		/* logging is switched off */
		if (nr) {
			log_lock(lg);
			ok = la_bat_update_count(lg, id, offset+cnt, lg->tid);
			log_unlock(lg);
		}
		return ok;
	}

	gdk_return (*wt) (const void *, stream *, size_t) = BATatoms[type].atomWrite;

	if (log_write_format(lg, &l) != GDK_SUCCEED ||
	    !mnstr_writeLng(lg->current->output_log, nr) ||
	    mnstr_write(lg->current->output_log, &tpe, 1, 1) != 1 ||
	    !mnstr_writeLng(lg->current->output_log, offset)) {
		(void) ATOMIC_DEC(&lg->current->refcount);
		ok = GDK_FAIL;
		goto bailout;
	}

	ok = wt(val, lg->current->output_log, 1);

	if (lg->debug & 1)
		fprintf(stderr, "#Logged %d " LLFMT " inserts\n", id, nr);

  bailout:
	if (ok != GDK_SUCCEED) {
		const char *err = mnstr_peek_error(lg->current->output_log);
		TRC_CRITICAL(GDK, "write failed%s%s\n", err ? ": " : "", err ? err : "");
	}
	return ok;
}

日志数据:

(gdb) p lg->current->output_log[0]
$19 = {
  name = 0x7f0fe360b4d0 "/usr/local/share/mysql/data/.#__mdb_dbfarm__/sql_logs/sql/log.5", 
  inner = 0x0, 
  swapbytes = false, 
  readonly = false, 
  isutf8 = false, 
  binary = true, 
  eof = false, 
  timeout = 0, 
  timeout_func = 0x0, 
  timeout_data = 0x0, 
  stream_data = {
    p = 0x7f130c012c90, 
    i = 201403536, 
    s = 201403536
  }, 
  read = 0x7f13dbaf8b9c , 
  write = 0x7f13dbaf8ccc , 
  close = 0x7f13dbaf8da6 , 
  clrerr = 0x7f13dbaf8ec8 , 
  destroy = 0x7f13dbaf8e7e , 
  flush = 0x7f13dbaf8f19 , 
  fsync = 0x7f13dbaf8fa8 , 
  fgetpos = 0x7f13dbaf903b , 
  fsetpos = 0x7f13dbaf90b4 , 
  update_timeout = 0x0, 
  isalive = 0x0, 
  errkind = MNSTR_NO__ERROR, 
  errmsg = '\000' 
}

[root@dev-stonedb-zhangshilong1 sql]# hexdump  -C log.5 
00000000  d2 04 00 0b 00 00 00 02  18 1a 00 00 05 00 00 00  |................|
00000010  00 00 00 00 01 19 00 00  00 00 00 00 00 00 09 18  |................|
00000020  1a 00 00 02 18 1a 00 00  05 00 00 00 00 00 00 00  |................|
00000030  01 19 00 00 00 00 00 00  00 00 03 16 1a 00 00 05  |................|
00000040  00 00 00 00 00 00 00 06  ff ff ff ff ff ff ff ff  |................|
00000050  01 00 00 00 02 00 00 00  03 00 00 00 04 00 00 00  |................|
00000060  05 00 00 00 03 17 1a 00  00 05 00 00 00 00 00 00  |................|
00000070  00 81 ff ff ff ff ff ff  ff ff 0a 00 00 00 00 00  |................|
00000080  00 00 61 00 62 00 63 00  64 00 65 00 09 e8 e5 ff  |..a.b.c.d.e.....|
00000090  ff 01 0b 00 00 00 00 0c  00 00 00 02 18 1a 00 00  |................|
000000a0  05 00 00 00 00 00 00 00  01 1e 00 00 00 00 00 00  |................|
000000b0  00 00 09 18 1a 00 00 02  18 1a 00 00 05 00 00 00  |................|
000000c0  00 00 00 00 01 1e 00 00  00 00 00 00 00 00 03 16  |................|
000000d0  1a 00 00 05 00 00 00 00  00 00 00 06 ff ff ff ff  |................|
000000e0  ff ff ff ff 01 00 00 00  02 00 00 00 03 00 00 00  |................|
000000f0  04 00 00 00 05 00 00 00  03 17 1a 00 00 05 00 00  |................|
00000100  00 00 00 00 00 81 ff ff  ff ff ff ff ff ff 0a 00  |................|
00000110  00 00 00 00 00 00 61 00  62 00 63 00 64 00 65 00  |......a.b.c.d.e.|
00000120  09 e8 e5 ff ff 01 0c 00  00 00                    |..........|
0000012a

sql_trans_commit

函数源码:


int
sql_trans_commit(sql_trans *tr)
{
	int ok = LOG_OK;
	sqlstore *store = tr->store;

	if (!list_empty(tr->changes)) {
		bool flush = false;
		ulng commit_ts = 0, oldest = 0, log_file_id = 0;

		MT_lock_set(&store->commit);

		if (!tr->parent && !list_empty(tr->predicates)) {
			ok = sql_trans_valid(tr);
			if (ok != LOG_OK) {
				sql_trans_rollback(tr, true);
				MT_lock_unset(&store->commit);
				return ok == LOG_CONFLICT ? SQL_CONFLICT : SQL_ERR;
			}
		}

		if (!tr->parent &&
			(!list_empty(tr->dependencies) || !list_empty(tr->depchanges))) {
			ok = transaction_check_dependencies_and_removals(tr);
			if (ok != LOG_OK) {
				sql_trans_rollback(tr, true);
				MT_lock_unset(&store->commit);
				return ok == LOG_CONFLICT ? SQL_CONFLICT : SQL_ERR;
			}
		}

		/* log changes should only be done if there is something to log */
		const bool log = !tr->parent && tr->logchanges > 0;

		if (log) {
			const int min_changes = GDKdebug & FORCEMITOMASK ? 5 : 1000000;
			flush = (tr->logchanges > min_changes && list_empty(store->changes));
		}

		if (flush)
			MT_lock_set(&store->flush);
		if (log) {
			ok = store->logger_api.log_tstart(store, flush, &log_file_id); /* wal start */
			/* log */
			for(node *n=tr->changes->h; n && ok == LOG_OK; n = n->next) {
				sql_change *c = n->data;

				if (c->log && ok == LOG_OK)
					ok = c->log(tr, c);
			}
			if (ok == LOG_OK && !list_empty(store->seqchanges)) {
				sequences_lock(store);
				for(node *n = store->seqchanges->h; n; ) {
					node *next = n->next;
					log_store_sequence(store, n->data);
					list_remove_node(store->seqchanges, NULL, n);
					n = next;
				}
				sequences_unlock(store);
			}
			if (ok == LOG_OK && store->prev_oid != store->obj_id)
				ok = store->logger_api.log_tsequence(store, OBJ_SID, store->obj_id);
			store->prev_oid = store->obj_id;


			if (ok == LOG_OK)
				ok = store->logger_api.log_tend(store); /* wal end */
		}
		store_lock(store);

		if (tr->parent) {
			commit_ts = oldest = tr->parent->tid;
			tr->parent->logchanges += tr->logchanges;
		}
		else {
			commit_ts = store_timestamp(store);
			oldest = store_oldest(store);
		}
		tr->logchanges = 0;
		TRC_DEBUG(SQL_STORE, "Forwarding changes (" ULLFMT ", " ULLFMT ") -> " ULLFMT "\n", tr->tid, tr->ts, commit_ts);
		/* apply committed changes */
		if (ATOMIC_GET(&store->nr_active) == 1 && !tr->parent)
			oldest = commit_ts;
		store_pending_changes(store, oldest);
		for(node *n=tr->changes->h; n && ok == LOG_OK; n = n->next) {
			sql_change *c = n->data;

			if (c->commit && ok == LOG_OK)
				ok = c->commit(tr, c, commit_ts, oldest);
			else
				c->obj->new = 0;
			c->ts = commit_ts;
		}
		/* propagate transaction dependencies to the storage only if other transactions are running */
		if (ok == LOG_OK && !tr->parent && ATOMIC_GET(&store->nr_active) > 1) {
			if (!list_empty(tr->dependencies)) {
				for (node *n = tr->dependencies->h; n && ok == LOG_OK; n = n->next) {
					sql_dependency_change *lchange = (sql_dependency_change*) n->data;
					ok = transaction_add_hash_entry(store->dependencies, lchange->objid, lchange->type, commit_ts);
				}
			}
			if (!list_empty(tr->depchanges)) {
				for (node *n = tr->depchanges->h; n && ok == LOG_OK; n = n->next) {
					sql_dependency_change *lchange = (sql_dependency_change*) n->data;
					ok = transaction_add_hash_entry(store->depchanges, lchange->objid, lchange->type, commit_ts);
				}
			}
		}
		/* garbage collect */
		for(node *n=tr->changes->h; n && ok == LOG_OK; ) {
			node *next = n->next;
			sql_change *c = n->data;

			if (!c->cleanup || c->cleanup(store, c, oldest)) {
				_DELETE(c);
			} else if (tr->parent) { /* need to keep everything */
				tr->parent->changes = sa_list_append(tr->sa, tr->parent->changes, c);
			} else {
				store->changes = sa_list_append(tr->sa, store->changes, c);
			}
			n = next;
		}
		tr->active = 2; /* small hack enabling to signal that this transaction has committed */
		tr->ts = commit_ts; /* fix profiler output */
		store_unlock(store);
		/* flush the log structure */
		if (log) {
			if (!flush)
				MT_lock_unset(&store->commit); /* release the commit log when flushing to disk */
			if (ok == LOG_OK)
				ok = store->logger_api.log_tflush(store, log_file_id, commit_ts); /* flush/sync */
			if (!flush)
				MT_lock_set(&store->commit); /* release the commit log when flushing to disk */
			if (flush)
				MT_lock_unset(&store->flush);
		}
		MT_lock_unset(&store->commit);
		list_destroy(tr->changes);
		tr->changes = NULL;
	} else if (ATOMIC_GET(&store->nr_active) == 1) { /* just me cleanup */
		MT_lock_set(&store->commit);
		store_lock(store);
		ulng oldest = store_timestamp(store);
		store_pending_changes(store, oldest);
		store_unlock(store);
		MT_lock_unset(&store->commit);
	}
	/* drop local temp tables with commit action CA_DROP, after cleanup */
	if (cs_size(&tr->localtmps)) {
		for(node *n=tr->localtmps.set->h; n; ) {
			node *next = n->next;
			sql_table *tt = n->data;

			if (tt->commit_action == CA_DROP)
				(void) sql_trans_drop_table_id(tr, tt->s, tt->base.id, DROP_RESTRICT);
			n = next;
		}
	}
	if (tr->localtmps.dset) {
		list_destroy2(tr->localtmps.dset, store);
		tr->localtmps.dset = NULL;
	}
	tr->localtmps.nelm = NULL;

	if (ok == LOG_OK)
		ok = clean_predicates_and_propagate_to_parent(tr);

	return (ok==LOG_OK)?SQL_OK:SQL_ERR;
}

你可能感兴趣的:(monetdb,monetdb,数据库)