/*
The basic lookup function used by all operations.
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent
probe indices are computed as explained earlier.
All arithmetic on hash should ignore overflow.
The details in this version are due to Tim Peters, building on many past
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
Christian Tismer.
lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
comparison raises an exception.
lookdict_unicode() below is specialized to string keys, comparison of which can
never raise an exception; that function can never return DKIX_ERROR when key
is string. Otherwise, it falls back to lookdict().
lookdict_unicode_nodummy is further specialized for string keys that cannot be
the value.
For both, when the key isn't found a DKIX_EMPTY is returned.
*/
/*
为何python不用链地址法,以上注释进行了说明,使用链地址法时,每添加一对键值绝对是要分配内存的,
当python使用线性探测再散列方法时,可根据负载因子进行扩容,所有添加键值有很大机率不需要分配内存的
*/
static Py_ssize_t _Py_HOT_FUNCTION
lookdict(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr)
{
size_t i, mask, perturb;
PyDictKeysObject *dk;
PyDictKeyEntry *ep0;
top:
dk = mp->ma_keys;
//哈希表起始点
ep0 = DK_ENTRIES(dk);
//计算哈希
mask = DK_MASK(dk);
perturb = hash;
i = (size_t)hash & mask;
for (;;) {
Py_ssize_t ix = dictkeys_get_index(dk, i);
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return ix;
}
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
//地址相同
if (ep->me_key == key) {
*value_addr = ep->me_value;
return ix;
}
//哈希相同
if (ep->me_hash == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
//进行富比较
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
//错误
if (cmp < 0) {
*value_addr = NULL;
return DKIX_ERROR;
}
//字典未变动
if (dk == mp->ma_keys && ep->me_key == startkey) {
//相等返回
if (cmp > 0) {
*value_addr = ep->me_value;
return ix;
}
}
//字典变动
else {
/* The dict was mutated, restart */
goto top;
}
}
}
//再散列
perturb >>= PERTURB_SHIFT;
i = (i*5 + perturb + 1) & mask;
}
Py_UNREACHABLE();
}
//unicode带dummpy检查
/* Specialized version for string-only keys */
static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr)
{
assert(mp->ma_values == NULL);
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
unicodes is to override __eq__, and for speed we don't cater to
that here. */
//Unicode检查
if (!PyUnicode_CheckExact(key)) {
mp->ma_keys->dk_lookup = lookdict;
return lookdict(mp, key, hash, value_addr);
}
//哈希表起始点
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
//计算哈希
size_t mask = DK_MASK(mp->ma_keys);
size_t perturb = (size_t)hash;
size_t i = (size_t)hash & mask;
for (;;) {
//获取索引位置对象
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
//空位
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
//找到键值相同
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
*value_addr = ep->me_value;
return ix;
}
}
//再散列
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}
//unicode无dummpy检查
/* Faster version of lookdict_unicode when it is known that no keys
* will be present. */
static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr)
{
assert(mp->ma_values == NULL);
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
unicodes is to override __eq__, and for speed we don't cater to
that here. */
//Unicode检查
if (!PyUnicode_CheckExact(key)) {
mp->ma_keys->dk_lookup = lookdict;
return lookdict(mp, key, hash, value_addr);
}
//哈希表
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
//计算哈希
size_t mask = DK_MASK(mp->ma_keys);
size_t perturb = (size_t)hash;
size_t i = (size_t)hash & mask;
for (;;) {
//获取索引位置对象
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
//不能是dummpy状态
assert (ix != DKIX_DUMMY);
//空位
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
//找到键值相同
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
*value_addr = ep->me_value;
return ix;
}
//再散列
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}
/* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
static inline Py_ssize_t
dictkeys_get_index(PyDictKeysObject *keys, Py_ssize_t i)
{
Py_ssize_t s = DK_SIZE(keys);
Py_ssize_t ix;
if (s <= 0xff) {
int8_t *indices = (int8_t*)(keys->dk_indices);
ix = indices[i];
}
else if (s <= 0xffff) {
int16_t *indices = (int16_t*)(keys->dk_indices);
ix = indices[i];
}
#if SIZEOF_VOID_P > 4
else if (s > 0xffffffff) {
int64_t *indices = (int64_t*)(keys->dk_indices);
ix = indices[i];
}
#endif
else {
int32_t *indices = (int32_t*)(keys->dk_indices);
ix = indices[i];
}
assert(ix >= DKIX_DUMMY);
return ix;
}