Python Notes:Set

typedef struct {
    PyObject_HEAD

    Py_ssize_t fill;            /* Number active and dummy entries*/
    Py_ssize_t used;            /* Number active entries */

    /* The table contains mask + 1 slots, and that's a power of 2.
     * We store the mask instead of the size because the mask is more
     * frequently needed.
     */
    Py_ssize_t mask;

    /* The table points to a fixed-size smalltable for small tables
     * or to additional malloc'ed memory for bigger tables.
     * The table pointer is never NULL which saves us from repeated
     * runtime null-tests.
     */
    setentry *table;
    Py_hash_t hash;             /* Only used by frozenset objects */
    Py_ssize_t finger;          /* Search finger for pop() */

#define PySet_MINSIZE 8

    setentry smalltable[PySet_MINSIZE];
    PyObject *weakreflist;      /* List of weak references */
} PySetObject;

Set集合New

static PyObject *
set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    return make_new_set(type, NULL);
}

static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{
    PySetObject *so;
    
    //分配对应类型内存
    so = (PySetObject *)type->tp_alloc(type, 0);
    if (so == NULL)
        return NULL;
    
    //填充属性
    so->fill = 0;
    so->used = 0;
    so->mask = PySet_MINSIZE - 1;    //设置掩码用于hash
    so->table = so->smalltable;    //先使用smalltable
    so->hash = -1;
    so->finger = 0;
    so->weakreflist = NULL;

    if (iterable != NULL) {
        if (set_update_internal(so, iterable)) {
            Py_DECREF(so);
            return NULL;
        }
    }

    return (PyObject *)so;
}

Set集合Add

最好O(1)

static PyObject *
set_add(PySetObject *so, PyObject *key)
{
    if (set_add_key(so, key))
        return NULL;
    Py_RETURN_NONE;
}


static int
set_add_key(PySetObject *so, PyObject *key)
{
    Py_hash_t hash;
    
    //获取对象hash
    if (!PyUnicode_CheckExact(key) ||
        (hash = ((PyASCIIObject *) key)->hash) == -1) {
        hash = PyObject_Hash(key);
        if (hash == -1)
            return -1;
    }

    return set_add_entry(so, key, hash);
}


static int
set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{
    setentry *table;
    setentry *freeslot;
    setentry *entry;
    size_t perturb;
    size_t mask;
    size_t i;                       /* Unsigned for defined overflow behavior */
    size_t j;
    int cmp;

    /* Pre-increment is necessary to prevent arbitrary code in the rich
       comparison from deallocating the key just before the insertion. */
    Py_INCREF(key);

  restart:
    
    //计算索引
    mask = so->mask;
    i = (size_t)hash & mask;
    
    //用索引找entry
    entry = &so->table[i];
    //如果键未设置跳往未使用
    if (entry->key == NULL)
        goto found_unused;

    freeslot = NULL;
    perturb = hash;

    while (1) {
        //如果entry的hash等于插入对象hash
        if (entry->hash == hash) {
            PyObject *startkey = entry->key;
            
            //发现键地址相同,冲突,
            if (startkey == key)
                goto found_active;
            //发现键值相同,冲突,
            if (PyUnicode_CheckExact(startkey)
                && PyUnicode_CheckExact(key)
                && _PyUnicode_EQ(startkey, key))
                goto found_active;
            table = so->table;
            Py_INCREF(startkey);

            //使用富比较进行比较
            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
            Py_DECREF(startkey);
            //相同
            if (cmp > 0)                                          /* likely */
                goto found_active;
            //发生错误
            if (cmp < 0)
                goto comparison_error;
            /* Continuing the search from the current entry only makes
               sense if the table and entry are unchanged; otherwise,
               we have to restart from the beginning */
            //table发生变动
            if (table != so->table || entry->key != startkey)
                goto restart;
            mask = so->mask;                 /* help avoid a register spill */
        }
        //未初始化,也就是未设置过直接设置
        else if (entry->hash == -1)
            freeslot = entry;
        
#define LINEAR_PROBES 9
        //往索引后进行一小段线性探测
        if (i + LINEAR_PROBES <= mask) {            //如果线性探测小于集合长度
            for (j = 0 ; j < LINEAR_PROBES ; j++) {    //进行小段线性探测
                entry++;
                //发现可用位置(删除状态)
                if (entry->hash == 0 && entry->key == NULL)
                    goto found_unused_or_dummy;
                //发现hash相同
                if (entry->hash == hash) {
                    PyObject *startkey = entry->key;
                    assert(startkey != dummy);
                    //发现键地址相同,冲突,
                    if (startkey == key)
                        goto found_active;
                    //发现键值相同,冲突,
                    if (PyUnicode_CheckExact(startkey)
                        && PyUnicode_CheckExact(key)
                        && _PyUnicode_EQ(startkey, key))
                        goto found_active;
                    table = so->table;
                    Py_INCREF(startkey);
                    //使用富比较进行比较
                    cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
                    Py_DECREF(startkey);
                    //相同
                    if (cmp > 0)
                        goto found_active;
                    //发生错误
                    if (cmp < 0)
                        goto comparison_error;
                    //table发生变动
                    if (table != so->table || entry->key != startkey)
                        goto restart;
                    mask = so->mask;
                }
                else if (entry->hash == -1)
                    freeslot = entry;
            }
        }
        
        //线性探测完毕未发现可用地址,使用再哈希重新计算哈希值,继续循环
        perturb >>= PERTURB_SHIFT;
        i = (i * 5 + 1 + perturb) & mask;

        entry = &so->table[i];
        //发现可用位置
        if (entry->key == NULL)
            goto found_unused_or_dummy;
    }

  //删除状态位置
  found_unused_or_dummy:
    if (freeslot == NULL)
        goto found_unused;
    so->used++;
    freeslot->key = key;
    freeslot->hash = hash;
    return 0;

  //未使用位置
  found_unused:
    so->fill++;
    so->used++;
    entry->key = key;
    entry->hash = hash;
    //负载因子大于5/8进行集合扩容
    if ((size_t)so->fill*5 < mask*3)
        return 0;
    //集合填充数大于50000,扩充至2倍,否则扩充至4倍
    return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);

  //冲突
  found_active:
    Py_DECREF(key);
    return 0;
    
  //比较错误
  comparison_error:
    Py_DECREF(key);
    return -1;
}

Set集合Remove

static PyObject *
set_remove(PySetObject *so, PyObject *key)
{
    PyObject *tmpkey;
    int rv;

    rv = set_discard_key(so, key);
    //删除元素错误
    if (rv < 0) {
        //类型检查
        if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
            return NULL;
        PyErr_Clear();
        //先转换为固定集合
        tmpkey = make_new_set(&PyFrozenSet_Type, key);
        if (tmpkey == NULL)
            return NULL;
        //重新尝试删除
        rv = set_discard_key(so, tmpkey);
        Py_DECREF(tmpkey);
        if (rv < 0)
            return NULL;
    }
    //未找到删除元素
    if (rv == DISCARD_NOTFOUND) {
        _PyErr_SetKeyError(key);
        return NULL;
    }
    Py_RETURN_NONE;
}


static int
set_discard_key(PySetObject *so, PyObject *key)
{
    Py_hash_t hash;
    
    //计算哈希
    if (!PyUnicode_CheckExact(key) ||
        (hash = ((PyASCIIObject *) key)->hash) == -1) {
        hash = PyObject_Hash(key);
        if (hash == -1)
            return -1;
    }
    return set_discard_entry(so, key, hash);
}


static int
set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{
    setentry *entry;
    PyObject *old_key;

    entry = set_lookkey(so, key, hash);
    //错误
    if (entry == NULL)
        return -1;
    //未找到
    if (entry->key == NULL)
        return DISCARD_NOTFOUND;
    //找到的情况
    old_key = entry->key;    //先保存
    entry->key = dummy;    //将key设置dummy状态
    entry->hash = -1;    //清除hash
    so->used--;    //使用数减一
    Py_DECREF(old_key);    //删除旧键
    return DISCARD_FOUND;
}



static setentry *
set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
    setentry *table;
    setentry *entry;
    size_t perturb;
    //计算索引
    size_t mask = so->mask;
    size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */

    size_t j;
    int cmp;

    entry = &so->table[i];
    //dumpy状态直接返回
    if (entry->key == NULL)
        return entry;

    perturb = hash;

    while (1) {
        //哈希相同
        if (entry->hash == hash) {
            PyObject *startkey = entry->key;
            /* startkey cannot be a dummy because the dummy hash field is -1 */
            assert(startkey != dummy);
            //键地址相同
            if (startkey == key)
                return entry;
            //键值相同
            if (PyUnicode_CheckExact(startkey)
                && PyUnicode_CheckExact(key)
                && _PyUnicode_EQ(startkey, key))
                return entry;
            table = so->table;
            Py_INCREF(startkey);
            //富比较
            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
            Py_DECREF(startkey);
            if (cmp < 0)                                          /* unlikely */
                return NULL;
            if (table != so->table || entry->key != startkey)     /* unlikely */
                return set_lookkey(so, key, hash);
            if (cmp > 0)                                          /* likely */
                return entry;
            mask = so->mask;                 /* help avoid a register spill */
        }
        //线性探测
        if (i + LINEAR_PROBES <= mask) {
            for (j = 0 ; j < LINEAR_PROBES ; j++) {
                entry++;
                if (entry->hash == 0 && entry->key == NULL)
                    return entry;
                //哈希相同
                if (entry->hash == hash) {
                    PyObject *startkey = entry->key;
                    assert(startkey != dummy);
                    //键地址相同
                    if (startkey == key)
                        return entry;
                    //键值相同
                    if (PyUnicode_CheckExact(startkey)
                        && PyUnicode_CheckExact(key)
                        && _PyUnicode_EQ(startkey, key))
                        return entry;
                    table = so->table;
                    Py_INCREF(startkey);
                    //富比较
                    cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
                    Py_DECREF(startkey);
                    if (cmp < 0)
                        return NULL;
                    if (table != so->table || entry->key != startkey)
                        return set_lookkey(so, key, hash);
                    if (cmp > 0)
                        return entry;
                    mask = so->mask;
                }
            }
        }
        //再哈希
        perturb >>= PERTURB_SHIFT;
        i = (i * 5 + 1 + perturb) & mask;

        entry = &so->table[i];
        if (entry->key == NULL)
            return entry;
    }
}

Set集合Update

static PyObject *
set_update(PySetObject *so, PyObject *args)
{
    Py_ssize_t i;
    
    //遍历
    for (i=0 ; ifill + dictsize)*5 >= so->mask*3) {
            if (set_table_resize(so, (so->used + dictsize)*2) != 0)
                return -1;
        }
        while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
            if (set_add_entry(so, key, hash))
                return -1;
        }
        return 0;
    }
    //转换为迭代器
    it = PyObject_GetIter(other);
    if (it == NULL)
        return -1;
    
    //遍历
    while ((key = PyIter_Next(it)) != NULL) {
        //设置单个
        if (set_add_key(so, key)) {
            Py_DECREF(it);
            Py_DECREF(key);
            return -1;
        }
        Py_DECREF(key);
    }
    Py_DECREF(it);
    if (PyErr_Occurred())
        return -1;
    return 0;
}


static int
set_add_key(PySetObject *so, PyObject *key)
{
    Py_hash_t hash;
    
    //计算哈希
    if (!PyUnicode_CheckExact(key) ||
        (hash = ((PyASCIIObject *) key)->hash) == -1) {
        hash = PyObject_Hash(key);
        if (hash == -1)
            return -1;
    }
    return set_add_entry(so, key, hash);
}

Set集合In操作(待续)

static PyObject *
set_richcompare(PySetObject *v, PyObject *w, int op)
{
    PyObject *r1;
    int r2;
    
    //集合检查
    if(!PyAnySet_Check(w))
        Py_RETURN_NOTIMPLEMENTED;

    switch (op) {
    case Py_EQ:
        if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
            Py_RETURN_FALSE;
        if (v->hash != -1  &&
            ((PySetObject *)w)->hash != -1 &&
            v->hash != ((PySetObject *)w)->hash)
            Py_RETURN_FALSE;
        return set_issubset(v, w);
    case Py_NE:
        r1 = set_richcompare(v, w, Py_EQ);
        if (r1 == NULL)
            return NULL;
        r2 = PyObject_IsTrue(r1);
        Py_DECREF(r1);
        if (r2 < 0)
            return NULL;
        return PyBool_FromLong(!r2);
    case Py_LE:
        return set_issubset(v, w);
    case Py_GE:
        return set_issuperset(v, w);
    case Py_LT:
        if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
            Py_RETURN_FALSE;
        return set_issubset(v, w);
    case Py_GT:
        if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
            Py_RETURN_FALSE;
        return set_issuperset(v, w);
    }
    Py_RETURN_NOTIMPLEMENTED;
}

 

你可能感兴趣的:(Python)