Redis没有表的概念。但是可以用key作标识进行区分,比如:user:1000作为key值,表示在user表下id为1000的元素,类似于user表的id=1000的行。
Redis逻辑关系: redisServer–>redisDB–>key-redisObject。所以这里主要介绍redisServer、redisDB、redisObject
结构。
服务端结构体redisServer存储Redis服务器的所有信息,包括但不限于数据库、配置参数、命令表、监听端口与地址、客户端列表、若干统计信息、RDB与AOF持久化相关信息、主从复制相关信息、集群相关信息等。
redis.h/redisServer
结构记录了和服务器相关的所有数据, 这个结构体主要包含以下信息:
struct redisServer {
/* General */
//配置文件路径
char *configfile; /* Absolute config file path, or NULL */
//serverCron()调用频率
int hz; /* serverCron() calls frequency in hertz */
//数据库对象数组指针
redisDb *db;
//支持的命令列表
dict *commands; /* Command table */
//没有转化的命令
dict *orig_commands; /* Command table before command renaming. */
//事件
aeEventLoop *el;
//每分钟增加一次
unsigned lruclock:22; /* Clock incrementing every minute, for LRU */
unsigned lruclock_padding:10;
int shutdown_asap; /* SHUTDOWN needed ASAP */
int activerehashing; /* Incremental rehash in serverCron() */
//验证密码
char *requirepass; /* Pass for AUTH command, or NULL */
char *pidfile; /* PID file path */
int arch_bits; /* 32 or 64 depending on sizeof(long) */
int cronloops; /* Number of times the cron function run */
char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */
int sentinel_mode; /* True if this instance is a Sentinel. */
/* Networking */
int port; /* TCP listening port */
int tcp_backlog; /* TCP listen() backlog */
char *bindaddr[REDIS_BINDADDR_MAX]; /* Addresses we should bind to */
int bindaddr_count; /* Number of addresses in server.bindaddr[] */
char *unixsocket; /* UNIX socket path */
mode_t unixsocketperm; /* UNIX socket permission */
int ipfd[REDIS_BINDADDR_MAX]; /* TCP socket file descriptors */
int ipfd_count; /* Used slots in ipfd[] */
int sofd; /* Unix socket file descriptor */
int cfd[REDIS_BINDADDR_MAX];/* Cluster bus listening socket */
int cfd_count; /* Used slots in cfd[] */
// 连接的客户端
list *clients; /* List of active clients */
list *clients_to_close; /* Clients to close asynchronously */
list *slaves, *monitors; /* List of slaves and MONITORs */
redisClient *current_client; /* Current client, only used on crash report */
int clients_paused; /* True if clients are currently paused */
mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
dict *migrate_cached_sockets;/* MIGRATE cached sockets */
/* RDB / AOF loading information */
int loading; /* We are loading data from disk if true */
off_t loading_total_bytes;
off_t loading_loaded_bytes;
time_t loading_start_time;
off_t loading_process_events_interval_bytes;
/* Fast pointers to often looked up command */
struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand,
*rpopCommand;
/* Fields used only for stats */
time_t stat_starttime; /* Server start time */
long long stat_numcommands; /* Number of processed commands */
long long stat_numconnections; /* Number of connections received */
long long stat_expiredkeys; /* Number of expired keys */
long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
long long stat_keyspace_hits; /* Number of successful lookups of keys */
long long stat_keyspace_misses; /* Number of failed lookups of keys */
size_t stat_peak_memory; /* Max used memory record */
long long stat_fork_time; /* Time needed to perform latest fork() */
long long stat_rejected_conn; /* Clients rejected because of maxclients */
long long stat_sync_full; /* Number of full resyncs with slaves. */
long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
//保存慢日志命令
list *slowlog; /* SLOWLOG list of commands */
long long slowlog_entry_id; /* SLOWLOG current entry ID */
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
/* The following two are used to track instantaneous "load" in terms
* of operations per second. */
long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */
long long ops_sec_last_sample_ops; /* numcommands in last sample */
long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES];
int ops_sec_idx;
/* Configuration */
int verbosity; /* Loglevel in redis.conf */
int maxidletime; /* Client timeout in seconds */
int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
int active_expire_enabled; /* Can be disabled for testing purposes. */
size_t client_max_querybuf_len; /* Limit for client query buffer length */
int dbnum; /* Total number of configured DBs */
int daemonize; /* True if running as a daemon */
clientBufferLimitsConfig client_obuf_limits[REDIS_CLIENT_LIMIT_NUM_CLASSES];
/* AOF persistence */
int aof_state; /* REDIS_AOF_(ON|OFF|WAIT_REWRITE) */
int aof_fsync; /* Kind of fsync() policy */
char *aof_filename; /* Name of the AOF file */
int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
off_t aof_current_size; /* AOF current size. */
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
pid_t aof_child_pid; /* PID if rewriting process */
list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
sds aof_buf; /* AOF buffer, written before entering the event loop */
int aof_fd; /* File descriptor of currently selected AOF file */
int aof_selected_db; /* Currently selected DB in AOF */
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
time_t aof_last_fsync; /* UNIX time of last fsync() */
time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
int aof_lastbgrewrite_status; /* REDIS_OK or REDIS_ERR */
unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
int aof_last_write_status; /* REDIS_OK or REDIS_ERR */
int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
/* RDB persistence */
long long dirty; /* Changes to DB from the last save */
long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
pid_t rdb_child_pid; /* PID of RDB saving child */
struct saveparam *saveparams; /* Save points array for RDB */
int saveparamslen; /* Number of saving points */
char *rdb_filename; /* Name of RDB file */
int rdb_compression; /* Use compression in RDB? */
int rdb_checksum; /* Use RDB checksum? */
time_t lastsave; /* Unix time of last successful save */
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
time_t rdb_save_time_last; /* Time used by last RDB save run. */
time_t rdb_save_time_start; /* Current RDB save start time. */
int lastbgsave_status; /* REDIS_OK or REDIS_ERR */
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
/* Propagation of commands in AOF / replication */
redisOpArray also_propagate; /* Additional command to propagate. */
/* Logging */
char *logfile; /* Path of log file */
int syslog_enabled; /* Is syslog enabled? */
char *syslog_ident; /* Syslog ident */
int syslog_facility; /* Syslog facility */
/* Replication (master) */
int slaveseldb; /* Last SELECTed DB in replication output */
long long master_repl_offset; /* Global replication offset */
int repl_ping_slave_period; /* Master pings the slave every N seconds */
char *repl_backlog; /* Replication backlog for partial syncs */
long long repl_backlog_size; /* Backlog circular buffer size */
long long repl_backlog_histlen; /* Backlog actual data length */
long long repl_backlog_idx; /* Backlog circular buffer current offset */
long long repl_backlog_off; /* Replication offset of first byte in the
backlog buffer. */
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
gets released. */
time_t repl_no_slaves_since; /* We have no slaves since that time.
Only valid if server.slaves len is 0. */
int repl_min_slaves_to_write; /* Min number of slaves to write. */
int repl_min_slaves_max_lag; /* Max lag of slaves to write. */
int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
/* Replication (slave) */
char *masterauth; /* AUTH with this password with master */
char *masterhost; /* Hostname of master */
int masterport; /* Port of master */
int repl_timeout; /* Timeout after N seconds of master idle */
redisClient *master; /* Client that is master for this slave */
redisClient *cached_master; /* Cached master to be reused for PSYNC. */
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
int repl_state; /* Replication status if the instance is a slave */
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
int repl_transfer_s; /* Slave -> Master SYNC socket */
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
int repl_serve_stale_data; /* Serve stale data when link is down? */
int repl_slave_ro; /* Slave is read only? */
time_t repl_down_since; /* Unix time at which link with master went down */
int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
int slave_priority; /* Reported in INFO and used by Sentinel. */
char repl_master_runid[REDIS_RUN_ID_SIZE+1]; /* Master run id for PSYNC. */
long long repl_master_initial_offset; /* Master PSYNC offset. */
/* Replication script cache. */
dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
int repl_scriptcache_size; /* Max number of elements. */
/* Synchronous replication. */
list *clients_waiting_acks; /* Clients waiting in WAIT command. */
int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
/* Limits */
unsigned int maxclients; /* Max number of simultaneous clients */
unsigned long long maxmemory; /* Max number of memory bytes to use */
int maxmemory_policy; /* Policy for key eviction */
int maxmemory_samples; /* Pricision of random sampling */
/* Blocked clients */
unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */
list *unblocked_clients; /* list of clients to unblock before next loop */
list *ready_keys; /* List of readyList structures for BLPOP & co */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
int sort_store;
/* Zip structure config, see redis.conf for more information */
size_t hash_max_ziplist_entries;
size_t hash_max_ziplist_value;
size_t list_max_ziplist_entries;
size_t list_max_ziplist_value;
size_t set_max_intset_entries;
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
time_t unixtime; /* Unix time sampled every cron cycle. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
list *pubsub_patterns; /* A list of pubsub_patterns */
int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
xor of REDIS_NOTIFY... flags. */
/* Cluster */
int cluster_enabled; /* Is cluster enabled? */
mstime_t cluster_node_timeout; /* Cluster node timeout. */
char *cluster_configfile; /* Cluster auto-generated config file name. */
struct clusterState *cluster; /* State of the cluster */
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
/* Scripting */
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
redisClient *lua_client; /* The "fake client" to query Redis from Lua */
redisClient *lua_caller; /* The client running EVAL right now, or NULL */
dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
mstime_t lua_time_limit; /* Script timeout in milliseconds */
mstime_t lua_time_start; /* Start time of script, milliseconds time */
int lua_write_dirty; /* True if a write command was called during the
execution of the current script. */
int lua_random_dirty; /* True if a random command was called during the
execution of the current script. */
int lua_timedout; /* True if we reached the time limit for script
execution. */
int lua_kill; /* Kill the script if true. */
/* Assert & bug reporting */
char *assert_failed;
char *assert_file;
int assert_line;
int bug_report_start; /* True if bug report header was already logged. */
int watchdog_period; /* Software watchdog period in ms. 0 = off */
};
typedef struct redisDb {
int id; /* id是本数据库的序号,为0-15(默认Redis有16个数据库) */
dict *dict; /* 存储数据库所有的key-value */
dict *expires; /* 键的过期时间,字典的键为键,字典的值为过期 UNIX 时间戳 */
dict *blocking_keys; /* blpop 存储阻塞key和客户端对象 */
dict *ready_keys; /* 阻塞后push 响应阻塞客户端 存储阻塞后push的key和客户端对象 */
dict *watched_keys; /* 存储watch监控的的key和客户端对象 */
long long avg_ttl; /* 存储的数据库对象的平均ttl(time to live),用于统计 */
list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */
} redisDb;
在32位的系统中: short
与int
占两个字节, long
占四个字节, long long
占八个字节;
在64位的系统中: short
占两个字节, int
与 long
占四个字节, long long
占八个字节。
C short/int/long/long long 等数据类型大小
C的标准并没有规定每种类型占多少位,只是说 “sizeof(long)>=sizeof(int)>=sizeof(short)”,所以具体的字节数都是根据编译器来确定的
数据类型 | 字节大小 | 数值范围 |
---|---|---|
char | 1 字节 | -128 到 127 或 0 到 255 |
unsigned char | 1 字节 | 0 到 255 |
short int (短整型) | 2 字节 | -32 768 〜+32 767**(2^15 - 1)** |
unsigned short int (无符号短整型) | 2 字节 | 0 〜+65 535**(2^16 - 1)** |
int (整型) | 2或4 字节 | 4字节:-2 147 483 648 〜+2 147 483 647**(2^31 - 1)** |
unsigned int (无符号整型) | 2或4字节 | 4字节:0 〜4 294 967 295 (2^32 - 1) |
long int (长整型) | 4 字节 | -2 147 483 648 〜+2 147 483 647**(2^31 - 1)** |
unsigned long int (无符号长整型) | 4 字节 | 0 〜4 294 967 295**(2^32 - 1)** |
long long int (超长整型) | 8字节 | -9 223 372 036 854 775 808~9 223 372 036 854 775 807 (2^63 - 1) |
unsigned long long int (无符号超长整型) | 8字节 | 048 446 744 073 709 551 615 (2^64 - 1) |
unsigned、signed、short、long修饰int,int可以省略
typedef struct redisObject {
// 类型 0-string 1-list 2-set 3-zset 4-hash,在宏中定义。
unsigned type:4; // :4 是位域(位段),表示只占用4bit,2^4 ,http://c.biancheng.net/view/2037.html
// 对象编码。某些类型的对象(如字符串和哈希)可以通过多种方式在内部表示。ENCODING表明表示方式。
unsigned encoding:4;
// LRU_BITS = 24,共24位,高16位存储一个分钟数级别的时间戳,低8位存储访问计数(lfu : 最近访问次数)
unsigned lru:LRU_BITS;
// 引用次数
int refcount;
// 指针指向具体数据,void * 类型,从而可以执行那六大数据结构
void *ptr;
} robj;
分别对应着5种基础数据类型(非数据结构类型)
/* The actual Redis Object */
#define OBJ_STRING 0 /* String object. */
#define OBJ_LIST 1 /* List object. */
#define OBJ_SET 2 /* Set object. */
#define OBJ_ZSET 3 /* Sorted set object. */
#define OBJ_HASH 4 /* Hash object. */
对象编码(数据结构类型)。某些类型的对象(如字符串和哈希)可以通过多种方式在内部表示。ENCODING表明表示方式。
#define OBJ_ENCODING_RAW 0 // 编码为字符串 c语言类型
#define OBJ_ENCODING_INT 1 // 编码为整数
#define OBJ_ENCODING_HT 2 // 编码为哈希表
#define OBJ_ENCODING_ZIPMAP 3 // 编码为 zipmap
#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */
#define OBJ_ENCODING_ZIPLIST 5 // 编码为压缩列表
#define OBJ_ENCODING_INTSET 6 // 编码为整数集合
#define OBJ_ENCODING_SKIPLIST 7 // 编码为跳跃表
#define OBJ_ENCODING_EMBSTR 8 // 编码为SDS字符串
#define OBJ_ENCODING_QUICKLIST 9 /* 快速列表 压缩列表+链表 */
#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
lru 记录的是对象最后一次被命令程序访问的时间,( 4.0 以上版本占 24 位,2.6 版本占 22 位)。高16位存储一个分钟数级别的时间戳,低8位存储访问计数(lfu : 最近访问次数) 。
lru----> 高16位: 最后被访问的时间,时间戳秒级十进制是19位,所以分钟级别的是16位。
lfu-----> 低8位: 最近访问次数
refcount 记录的是该对象被引用的次数,类型为整型。refcount 的作用,主要在于对象的引用计数和内存回收。
当对象的refcount>1时,称为共享对象,Redis 为了节省内存,当有一些对象重复出现时(例如经常返回的"OK"字符串),新的程序不会创建新的对象,而是仍然使用原来的对象。
ptr 指针指向具体的数据,比如:set hello world,ptr 指向包含字符串 world 的 SDS。
当然是为了追求速度,不同数据类型使用不同的数据结构速度才得以提升。每种数据类型都有一种或者多种数据结构来支撑,底层数据结构有 6 种。
type与encoding有固定的搭配,encoding表示的是对应的数据结构。
type | encoding-simple | encoding | 描述 |
---|---|---|---|
string | int | REDIS_ENCODING_INT(整数) | value为纯数字时 |
embstr | REDIS_ENCODING_EMBSTR(embstr 编码的简单动态字符串(SDS)) | value为小字符串,长度 小于 44(OBJ_ENCODING_EMBSTR_SIZE_LIMIT)个字节 | |
raw | REDIS_ENCODING_RAW(简单动态字符串) | value为大字符串,长度 大于 44(OBJ_ENCODING_EMBSTR_SIZE_LIMIT)个字节 | |
list | quicklist | REDIS_ENCODING_QUICKLIST(快速列表) | |
set | intset | REDIS_ENCODING_INTSET(整数集合) | 当Redis集合类型的所有元素均是整数并且都处在64位有符号整数范围内。 |
dict | REDIS_ENCODING_HT(字典) | 当Redis集合类型的元素是非整数或包含处在64位有符号整数范围外元素。 | |
zset | ziplist | REDIS_ENCODING_ZIPLIST(压缩列表) | 当元素的个数比较少,且元素都是小整数或短字符串时。 |
skiplist | REDIS_ENCODING_SKIPLIST(跳表) | 当元素的个数比较多或元素不是小整数或短字符串时。 | |
hash | ziplist | REDIS_ENCODING_ZIPLIST(压缩列表) | 当散列表元素的个数比较少,且元素都是小整数或短字符串时。 |
dict | REDIS_ENCODING_HT(字典) | 当散列表元素的个数比较多或元素不是小整数或短字符串时。 | |
stream | ziplist | OBJ_ENCODING_STREAM(流) | 类似消息队列 |
Redis是典型的客户端/服务器(C/S)结构,客户端通过socket与服务端建立网络连接并发送命令请求,服务端处理命令请求并回复。Redis使用结构体client存储客户端连接的所有信息。包括但不限于客户端的名称、客户端连接的套接字描述符、客户端当前选择的数据库ID、客户端的输入缓冲区与输出缓冲区等。结构体client字段较多,此处只介绍命令处理主流程所需的关键字段。
typedef struct client {
uint64_t id; // 客户端唯一ID,通过全局变量server.next_client_id实现。
int fd; // socket的文件描述符。
redisDb *db; // select命令选择的数据库对象
robj *name; // 客户端名称,可以使用命令CLIENT SETNAME设置。
time_t lastinteraction // 客户端上次与服务器交互的时间,以此实现客户端的超时处理。
sds querybuf; //输入缓冲区,recv函数接收的客户端命令请求会暂时缓存在此缓冲区。
int argc;
robj **argv;
struct redisCommand *cmd;
list *reply;
unsigned long long reply_bytes;
size_t sentlen;
char buf[PROTO_REPLY_CHUNK_BYTES];
int bufpos;
} client;
id为客户端唯一ID,通过全局变量server.next_client_id实现。
fd为客户端socket的文件描述符。
db为客户端使用select命令选择的数据库对象。
name:客户端名称,可以使用命令CLIENT SETNAME设置。
lastinteraction:客户端上次与服务器交互的时间,以此实现客户端的超时处理。
querybuf:输入缓冲区,recv函数接收的客户端命令请求会暂时缓存在此缓冲区。
argc:输入缓冲区的命令请求是按照Redis协议格式编码字符串,需要解析出命令请求的所有参数,参数个数存储在argc字段,参数内容被解析为robj对象,存储在argv数组。
cmd:待执行的客户端命令;解析命令请求后,会根据命令名称查找该命令对应的命令对象,存储在客户端cmd字段,可以看到其类型为struct redisCommand。
reply:输出链表,存储待返回给客户端的命令回复数据。链表节点存储的值类型为clientReplyBlock,定义如下:
typedef struct clientReplyBlock {
size_t size, used;
char buf[];
} clientReplyBlock;
可以看到链表节点本质上就是一个缓冲区(buffffer),其中size表示缓冲区空间总大小,used表示缓冲区已使用空间大小。
redis支持的所有命令初始都存储在全局变量redisCommandTable中,类型为redisCommand。
struct redisCommand redisCommandTable[] = {
{"module",moduleCommand,-2,"as",0,NULL,0,0,0,0,0},
{"get",getCommand,2,"rF",0,NULL,1,1,1,0,0},
{"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0},
// ……忽略部分代码……
};
redisCommand结构也非常简单:
struct redisCommand {
char *name;
redisCommandProc *proc;
int arity;
char *sflags; /* Flags as string representation, one char per flag. */
int flags; /* The actual flags, obtained from the 'sflags' field. */
/* Use a function to determine keys arguments in a command line.
* Used for Redis Cluster redirect. */
redisGetKeysProc *getkeys_proc;
/* What keys should be loaded in background when calling this command? */
int firstkey; /* The first argument that's a key (0 = no keys) */
int lastkey; /* The last argument that's a key */
int keystep; /* The step between first and last key */
long long microseconds, calls;
};
name:命令名称。
proc:命令处理函数。
arity:命令参数数目,用于校验命令请求格式是否正确;当arity小于0时,表示命令参数数目大于等于arity;当arity大于0时,表示命令参数数目必须为arity;注意命令请求中,命令的名称本身也是一个参数,如get命令的参数数目为2,命令请求格式为get key。
sflags:命令标志,例如标识命令时读命令还是写命令,详情参见表9-2;注意到sflags的类型为字符串,此处只是为了良好的可读性。
flags:命令的二进制标志,服务器启动时解析sflags字段生成。
calls:从服务器启动至今命令执行的次数,用于统计。
microseconds:从服务器启动至今命令总的执行时间,
microseconds/calls即可计算出该命令的平均处理时间,用于统计。