typedef struct {
PyObject_HEAD
Py_ssize_t fill; /* Number active and dummy entries*/
Py_ssize_t used; /* Number active entries */
/* The table contains mask + 1 slots, and that's a power of 2.
* We store the mask instead of the size because the mask is more
* frequently needed.
*/
Py_ssize_t mask;
/* The table points to a fixed-size smalltable for small tables
* or to additional malloc'ed memory for bigger tables.
* The table pointer is never NULL which saves us from repeated
* runtime null-tests.
*/
setentry *table;
Py_hash_t hash; /* Only used by frozenset objects */
Py_ssize_t finger; /* Search finger for pop() */
#define PySet_MINSIZE 8
setentry smalltable[PySet_MINSIZE];
PyObject *weakreflist; /* List of weak references */
} PySetObject;
Set集合New
static PyObject *
set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
return make_new_set(type, NULL);
}
static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{
PySetObject *so;
//分配对应类型内存
so = (PySetObject *)type->tp_alloc(type, 0);
if (so == NULL)
return NULL;
//填充属性
so->fill = 0;
so->used = 0;
so->mask = PySet_MINSIZE - 1; //设置掩码用于hash
so->table = so->smalltable; //先使用smalltable
so->hash = -1;
so->finger = 0;
so->weakreflist = NULL;
if (iterable != NULL) {
if (set_update_internal(so, iterable)) {
Py_DECREF(so);
return NULL;
}
}
return (PyObject *)so;
}
Set集合Add
最好O(1)
static PyObject *
set_add(PySetObject *so, PyObject *key)
{
if (set_add_key(so, key))
return NULL;
Py_RETURN_NONE;
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
Py_hash_t hash;
//获取对象hash
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
return set_add_entry(so, key, hash);
}
static int
set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{
setentry *table;
setentry *freeslot;
setentry *entry;
size_t perturb;
size_t mask;
size_t i; /* Unsigned for defined overflow behavior */
size_t j;
int cmp;
/* Pre-increment is necessary to prevent arbitrary code in the rich
comparison from deallocating the key just before the insertion. */
Py_INCREF(key);
restart:
//计算索引
mask = so->mask;
i = (size_t)hash & mask;
//用索引找entry
entry = &so->table[i];
//如果键未设置跳往未使用
if (entry->key == NULL)
goto found_unused;
freeslot = NULL;
perturb = hash;
while (1) {
//如果entry的hash等于插入对象hash
if (entry->hash == hash) {
PyObject *startkey = entry->key;
//发现键地址相同,冲突,
if (startkey == key)
goto found_active;
//发现键值相同,冲突,
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
goto found_active;
table = so->table;
Py_INCREF(startkey);
//使用富比较进行比较
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
//相同
if (cmp > 0) /* likely */
goto found_active;
//发生错误
if (cmp < 0)
goto comparison_error;
/* Continuing the search from the current entry only makes
sense if the table and entry are unchanged; otherwise,
we have to restart from the beginning */
//table发生变动
if (table != so->table || entry->key != startkey)
goto restart;
mask = so->mask; /* help avoid a register spill */
}
//未初始化,也就是未设置过直接设置
else if (entry->hash == -1)
freeslot = entry;
#define LINEAR_PROBES 9
//往索引后进行一小段线性探测
if (i + LINEAR_PROBES <= mask) { //如果线性探测小于集合长度
for (j = 0 ; j < LINEAR_PROBES ; j++) { //进行小段线性探测
entry++;
//发现可用位置(删除状态)
if (entry->hash == 0 && entry->key == NULL)
goto found_unused_or_dummy;
//发现hash相同
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
//发现键地址相同,冲突,
if (startkey == key)
goto found_active;
//发现键值相同,冲突,
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
goto found_active;
table = so->table;
Py_INCREF(startkey);
//使用富比较进行比较
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
//相同
if (cmp > 0)
goto found_active;
//发生错误
if (cmp < 0)
goto comparison_error;
//table发生变动
if (table != so->table || entry->key != startkey)
goto restart;
mask = so->mask;
}
else if (entry->hash == -1)
freeslot = entry;
}
}
//线性探测完毕未发现可用地址,使用再哈希重新计算哈希值,继续循环
perturb >>= PERTURB_SHIFT;
i = (i * 5 + 1 + perturb) & mask;
entry = &so->table[i];
//发现可用位置
if (entry->key == NULL)
goto found_unused_or_dummy;
}
//删除状态位置
found_unused_or_dummy:
if (freeslot == NULL)
goto found_unused;
so->used++;
freeslot->key = key;
freeslot->hash = hash;
return 0;
//未使用位置
found_unused:
so->fill++;
so->used++;
entry->key = key;
entry->hash = hash;
//负载因子大于5/8进行集合扩容
if ((size_t)so->fill*5 < mask*3)
return 0;
//集合填充数大于50000,扩充至2倍,否则扩充至4倍
return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
//冲突
found_active:
Py_DECREF(key);
return 0;
//比较错误
comparison_error:
Py_DECREF(key);
return -1;
}
Set集合Remove
static PyObject *
set_remove(PySetObject *so, PyObject *key)
{
PyObject *tmpkey;
int rv;
rv = set_discard_key(so, key);
//删除元素错误
if (rv < 0) {
//类型检查
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
//先转换为固定集合
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return NULL;
//重新尝试删除
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
if (rv < 0)
return NULL;
}
//未找到删除元素
if (rv == DISCARD_NOTFOUND) {
_PyErr_SetKeyError(key);
return NULL;
}
Py_RETURN_NONE;
}
static int
set_discard_key(PySetObject *so, PyObject *key)
{
Py_hash_t hash;
//计算哈希
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
return set_discard_entry(so, key, hash);
}
static int
set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{
setentry *entry;
PyObject *old_key;
entry = set_lookkey(so, key, hash);
//错误
if (entry == NULL)
return -1;
//未找到
if (entry->key == NULL)
return DISCARD_NOTFOUND;
//找到的情况
old_key = entry->key; //先保存
entry->key = dummy; //将key设置dummy状态
entry->hash = -1; //清除hash
so->used--; //使用数减一
Py_DECREF(old_key); //删除旧键
return DISCARD_FOUND;
}
static setentry *
set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
setentry *table;
setentry *entry;
size_t perturb;
//计算索引
size_t mask = so->mask;
size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
size_t j;
int cmp;
entry = &so->table[i];
//dumpy状态直接返回
if (entry->key == NULL)
return entry;
perturb = hash;
while (1) {
//哈希相同
if (entry->hash == hash) {
PyObject *startkey = entry->key;
/* startkey cannot be a dummy because the dummy hash field is -1 */
assert(startkey != dummy);
//键地址相同
if (startkey == key)
return entry;
//键值相同
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
return entry;
table = so->table;
Py_INCREF(startkey);
//富比较
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) /* unlikely */
return NULL;
if (table != so->table || entry->key != startkey) /* unlikely */
return set_lookkey(so, key, hash);
if (cmp > 0) /* likely */
return entry;
mask = so->mask; /* help avoid a register spill */
}
//线性探测
if (i + LINEAR_PROBES <= mask) {
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
if (entry->hash == 0 && entry->key == NULL)
return entry;
//哈希相同
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
//键地址相同
if (startkey == key)
return entry;
//键值相同
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
return entry;
table = so->table;
Py_INCREF(startkey);
//富比较
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
if (table != so->table || entry->key != startkey)
return set_lookkey(so, key, hash);
if (cmp > 0)
return entry;
mask = so->mask;
}
}
}
//再哈希
perturb >>= PERTURB_SHIFT;
i = (i * 5 + 1 + perturb) & mask;
entry = &so->table[i];
if (entry->key == NULL)
return entry;
}
}
Set集合Update
static PyObject *
set_update(PySetObject *so, PyObject *args)
{
Py_ssize_t i;
//遍历
for (i=0 ; ifill + dictsize)*5 >= so->mask*3) {
if (set_table_resize(so, (so->used + dictsize)*2) != 0)
return -1;
}
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
if (set_add_entry(so, key, hash))
return -1;
}
return 0;
}
//转换为迭代器
it = PyObject_GetIter(other);
if (it == NULL)
return -1;
//遍历
while ((key = PyIter_Next(it)) != NULL) {
//设置单个
if (set_add_key(so, key)) {
Py_DECREF(it);
Py_DECREF(key);
return -1;
}
Py_DECREF(key);
}
Py_DECREF(it);
if (PyErr_Occurred())
return -1;
return 0;
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
Py_hash_t hash;
//计算哈希
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
return set_add_entry(so, key, hash);
}
Set集合In操作(待续)
static PyObject *
set_richcompare(PySetObject *v, PyObject *w, int op)
{
PyObject *r1;
int r2;
//集合检查
if(!PyAnySet_Check(w))
Py_RETURN_NOTIMPLEMENTED;
switch (op) {
case Py_EQ:
if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
Py_RETURN_FALSE;
if (v->hash != -1 &&
((PySetObject *)w)->hash != -1 &&
v->hash != ((PySetObject *)w)->hash)
Py_RETURN_FALSE;
return set_issubset(v, w);
case Py_NE:
r1 = set_richcompare(v, w, Py_EQ);
if (r1 == NULL)
return NULL;
r2 = PyObject_IsTrue(r1);
Py_DECREF(r1);
if (r2 < 0)
return NULL;
return PyBool_FromLong(!r2);
case Py_LE:
return set_issubset(v, w);
case Py_GE:
return set_issuperset(v, w);
case Py_LT:
if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
Py_RETURN_FALSE;
return set_issubset(v, w);
case Py_GT:
if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
Py_RETURN_FALSE;
return set_issuperset(v, w);
}
Py_RETURN_NOTIMPLEMENTED;
}