HandlerSocket是mysql的一个插件,主要是越过parser,optimizer这一层,直接对数据库进行key/value的存储,对于大内存的数据库,能极大的提高性能.
通过阅读代码,发现它主要用到了mysql的下面的结构体
struct tablevec_entry {
TABLE *table;
size_t refcount;
bool modified;
tablevec_entry() : table(0), refcount(0), modified(false) { }
};
struct expr_user_lock : private noncopyable {
expr_user_lock(THD *thd, int timeout)
: lck_key("handlersocket_wr", 16, &my_charset_latin1),
lck_timeout(timeout),
lck_func_get_lock(&lck_key, &lck_timeout),
lck_func_release_lock(&lck_key)
{
lck_key.fix_fields(thd, 0);
lck_timeout.fix_fields(thd, 0);
lck_func_get_lock.fix_fields(thd, 0);
lck_func_release_lock.fix_fields(thd, 0);
}
long long get_lock() {
return lck_func_get_lock.val_int();
}
long long release_lock() {
return lck_func_release_lock.val_int();
}
private:
Item_string lck_key;
Item_int lck_timeout;
Item_func_get_lock lck_func_get_lock;
Item_func_release_lock lck_func_release_lock;
};
struct dbcontext : public dbcontext_i, private noncopyable {
dbcontext(volatile database *d, bool for_write);
virtual ~dbcontext();
virtual void init_thread(const void *stack_botton,
volatile int& shutdown_flag);
virtual void term_thread();
virtual bool check_alive();
virtual void lock_tables_if();
virtual void unlock_tables_if();
virtual bool get_commit_error();
virtual void clear_error();
virtual void close_tables_if();
virtual void table_addref(size_t tbl_id);
virtual void table_release(size_t tbl_id);
virtual void cmd_open(dbcallback_i& cb, const cmd_open_args& args);
virtual void cmd_exec(dbcallback_i& cb, const cmd_exec_args& args);
virtual void set_statistics(size_t num_conns, size_t num_active);
private:
int set_thread_message(const char *fmt, ...)
__attribute__((format (printf, 2, 3)));
bool parse_fields(TABLE *const table, const char *str,
prep_stmt::fields_type& flds);
void cmd_insert_internal(dbcallback_i& cb, const prep_stmt& pst,
const string_ref *fvals, size_t fvalslen);
void cmd_sql_internal(dbcallback_i& cb, const prep_stmt& pst,
const string_ref *fvals, size_t fvalslen);
void cmd_find_internal(dbcallback_i& cb, const prep_stmt& pst,
ha_rkey_function find_flag, const cmd_exec_args& args);
size_t calc_filter_buf_size(TABLE *table, const prep_stmt& pst,
const record_filter *filters);
bool fill_filter_buf(TABLE *table, const prep_stmt& pst,
const record_filter *filters, uchar *filter_buf, size_t len);
int check_filter(dbcallback_i& cb, TABLE *table, const prep_stmt& pst,
const record_filter *filters, const uchar *filter_buf);
void resp_record(dbcallback_i& cb, TABLE *const table, const prep_stmt& pst);
void dump_record(dbcallback_i& cb, TABLE *const table, const prep_stmt& pst);
int modify_record(dbcallback_i& cb, TABLE *const table,
const prep_stmt& pst, const cmd_exec_args& args, char mod_op,
size_t& modified_count);
private:
typedef std::vector<tablevec_entry> table_vec_type;
typedef std::pair<std::string, std::string> table_name_type;
typedef std::map<table_name_type, size_t> table_map_type;
private:
volatile database *const dbref;
bool for_write_flag;
THD *thd;
MYSQL_LOCK *lock;
bool lock_failed;
std::auto_ptr<expr_user_lock> user_lock;
int user_level_lock_timeout;
bool user_level_lock_locked;
bool commit_error;
std::vector<char> info_message_buf;
table_vec_type table_vec;
table_map_type table_map;
};
对表加锁,调用的是mysql的
MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
表解锁
void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)
修改完后,提交事务
bool trans_commit_stmt(THD *thd)
关闭表
void close_thread_tables(THD *thd)
打开表
bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
Open_table_context *ot_ctx)
//最核心的函数 void dbcontext::cmd_find_internal(dbcallback_i& cb, const prep_stmt& pst, ha_rkey_function find_flag, const cmd_exec_args& args) { const bool debug_out = (verbose_level >= 100); bool need_resp_record = true; char mod_op = 0; const string_ref& mod_op_str = args.mod_op; if (mod_op_str.size() != 0) { if (!for_write_flag) { return cb.dbcb_resp_short(2, "readonly"); } mod_op = mod_op_str.begin()[0]; need_resp_record = mod_op_str.size() > 1 && mod_op_str.begin()[1] == '?'; switch (mod_op) { case 'U': /* update */ case 'D': /* delete */ case '+': /* increment */ case '-': /* decrement */ break; default: if (debug_out) { fprintf(stderr, "unknown modop: %c\n", mod_op); } return cb.dbcb_resp_short(2, "modop"); } } lock_tables_if(); if (lock == 0) { return cb.dbcb_resp_short(1, "lock_tables"); } if (pst.get_table_id() >= table_vec.size()) { return cb.dbcb_resp_short(2, "tblnum"); } TABLE *const table = table_vec[pst.get_table_id()].table; /* keys */ if (pst.get_idxnum() >= table->s->keys) { return cb.dbcb_resp_short(2, "idxnum"); } KEY& kinfo = table->key_info[pst.get_idxnum()]; if (args.kvalslen > kinfo.key_parts) { return cb.dbcb_resp_short(2, "kpnum"); } uchar *const key_buf = DENA_ALLOCA_ALLOCATE(uchar, kinfo.key_length); size_t invalues_idx = 0; size_t kplen_sum = prepare_keybuf(args, key_buf, table, kinfo, invalues_idx); /* filters */ uchar *filter_buf = 0; if (args.filters != 0) { const size_t filter_buf_len = calc_filter_buf_size(table, pst, args.filters); filter_buf = DENA_ALLOCA_ALLOCATE(uchar, filter_buf_len); if (!fill_filter_buf(table, pst, args.filters, filter_buf, filter_buf_len)) { return cb.dbcb_resp_short(2, "filterblob"); } } /* handler */ table->read_set = &table->s->all_set; handler *const hnd = table->file; if (!for_write_flag) { hnd->init_table_handle_for_HANDLER(); } hnd->ha_index_or_rnd_end(); hnd->ha_index_init(pst.get_idxnum(), 1); if (need_resp_record) { cb.dbcb_resp_begin(pst.get_ret_fields().size()); } const uint32_t limit = args.limit ? args.limit : 1; uint32_t skip = args.skip; size_t modified_count = 0; int r = 0; bool is_first = true; for (uint32_t cnt = 0; cnt < limit + skip;) { if (is_first) { is_first = false; const key_part_map kpm = (1U << args.kvalslen) - 1; r = hnd->index_read_map(table->record[0], key_buf, kpm, find_flag); } else if (args.invalues_keypart >= 0) { if (++invalues_idx >= args.invalueslen) { break; } kplen_sum = prepare_keybuf(args, key_buf, table, kinfo, invalues_idx); const key_part_map kpm = (1U << args.kvalslen) - 1; r = hnd->index_read_map(table->record[0], key_buf, kpm, find_flag); } else { switch (find_flag) { case HA_READ_BEFORE_KEY: case HA_READ_KEY_OR_PREV: r = hnd->index_prev(table->record[0]); break; case HA_READ_AFTER_KEY: case HA_READ_KEY_OR_NEXT: r = hnd->index_next(table->record[0]); break; case HA_READ_KEY_EXACT: r = hnd->index_next_same(table->record[0], key_buf, kplen_sum); break; default: r = HA_ERR_END_OF_FILE; /* to finish the loop */ break; } } if (debug_out) { fprintf(stderr, "r=%d\n", r); if (r == 0 || r == HA_ERR_RECORD_DELETED) { dump_record(cb, table, pst); } } int filter_res = 0; if (r != 0) { /* no-count */ } else if (args.filters != 0 && (filter_res = check_filter(cb, table, pst, args.filters, filter_buf)) != 0) { if (filter_res < 0) { break; } } else if (skip > 0) { --skip; } else { /* hit */ if (need_resp_record) { resp_record(cb, table, pst); } if (mod_op != 0) { r = modify_record(cb, table, pst, args, mod_op, modified_count); } ++cnt; } if (args.invalues_keypart >= 0 && r == HA_ERR_KEY_NOT_FOUND) { continue; } if (r != 0 && r != HA_ERR_RECORD_DELETED) { break; } } hnd->ha_index_or_rnd_end(); if (r != 0 && r != HA_ERR_RECORD_DELETED && r != HA_ERR_KEY_NOT_FOUND && r != HA_ERR_END_OF_FILE) { /* failed */ if (need_resp_record) { /* revert dbcb_resp_begin() and dbcb_resp_entry() */ cb.dbcb_resp_cancel(); } cb.dbcb_resp_short_num(1, r); } else { /* succeeded */ if (need_resp_record) { cb.dbcb_resp_end(); } else { cb.dbcb_resp_short_num(0, modified_count); } } DENA_ALLOCA_FREE(filter_buf); DENA_ALLOCA_FREE(key_buf); }
总的来说,HandlerSocket并没有直接调用到innodb的函数,还是调用了上面那层,用C++写的函数
把传进来的key,封装成索引的查找条件,之后去数据库中查找
锁的类度较大,是表级的,对于select较多的应较快,对于update多的话,性能不一定高吧
实质上类似于通过C语言直接调用oracle的OCI,性能提高主要还是mysql的实现好吧
,