目录结构:
Include: 包含Python所有头文件
Lib: Python自带的所有标准库
Modules: 所有C语言编写的模块
Paser: Python解释器中Scanner和Parser部分(词法分析和语法分析)
Objects: Python所有内建对象及运行时需要的所有内部使用对象的实现
Python: Python解释器的Compiler和执行引擎(核心)
PCBuild: 工程文件
编译
$ ./configure --prefix=./GitClone/cpython/Build
$ make
$ make install
Python世界中一切都是对象
/* PyObject_HEAD defines the initial segment of every PyObject. */ #define PyObject_HEAD \ _PyObject_HEAD_EXTRA \ Py_ssize_t ob_refcnt; \ struct _typeobject *ob_type; /* PyObject_VAR_HEAD defines the initial segment of all variable-sizecontainer objects */ #define PyObject_VAR_HEAD \ PyObject_HEAD \ Py_ssize_t ob_size; /* Number of items in variable part 指明变长对象中容纳元素的个数*/
#object.h typedef struct _object { PyObject_HEAD } PyObject; #实际是一个整型引用计数, 一个类型结构体指针 typedef struct _object { Py_ssize_t ob_refcnt; #int引用计数 struct _typeobject *ob_type; } PyObject; typedef struct _typeobject { PyObject_VAR_HEAD const char *tp_name; /* For printing, in format "<module>.<name>" */ Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation 对象分配内存空间大小信息*/ /* Methods to implement standard operations */ destructor tp_dealloc; printfunc tp_print; getattrfunc tp_getattr; setattrfunc tp_setattr; cmpfunc tp_compare; reprfunc tp_repr; /* Method suites for standard classes */ PyNumberMethods *tp_as_number; #数值对象支持操作 PySequenceMethods *tp_as_sequence; #序列对象支持操作 PyMappingMethods *tp_as_mapping; #关联对象支持操作 /* More standard operations (here for binary compatibility) */ ... } PyTypeObject; #通过PyType_Type(对应<type 'type'>, 被称为metaclass)来确定PyTypeObject #用户自定义class所对应的PyTypeObject就是通过PyType_Type创建 #typeobject.c PyTypeObject PyType_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "type", /* tp_name */ sizeof(PyHeapTypeObject), /* tp_basicsize */ sizeof(PyMemberDef), /* tp_itemsize */ (destructor)type_dealloc, /* tp_dealloc */ ... };
Python每个对象除了PyObject还保存了属于自己的特殊的信息
#变长对象 #define PyObject_VAR_HEAD \ PyObject_HEAD \ Py_ssize_t ob_size; /* Number of items in variable part 指明变长对象中容纳元素的个数*/ typedef struct { PyObject_VAR_HEAD } PyVarObject;
int对象类型PyInt__Type
object对象类型PyBaseObject__Type
对象创建int为例, 调用PyInt_Type中的tp_new, 若为NULL则找到基类的tp_new, 知道找到不为NULL的tp_new, tp_new指向object_new, object_new访问PyInt_Type中的tp_basicsize完成内存申请.PyInt_Type继续执行tp_init初始化
对象
多态的核心: 通过传入对象的ob_type类型确定对象的类型, 从来调用对象对应的操作.
#define _Py_NewReference(op) ( #初始化引用计数为1 \ _Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA \ _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \ Py_REFCNT(op) = 1) #define _Py_ForgetReference(op) _Py_INC_TPFREES(op) #define _Py_Dealloc(op) (#为0时销毁对象 \ _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA \ (*Py_TYPE(op)->tp_dealloc)((PyObject *)(op))) #endif /* !Py_TRACE_REFS */ #define Py_INCREF(op) ( #增加对象引用计数 \ _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \ ((PyObject*)(op))->ob_refcnt++) #define Py_DECREF(op) #减少对象引用计数, 为0时调用dealloc销毁 \ do { \ if (_Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA \ --((PyObject*)(op))->ob_refcnt != 0) \ _Py_CHECK_REFCNT(op) \ else \ _Py_Dealloc((PyObject *)(op)); \ } while (0)
#intobject.h #PyIntOject创建方式 PyAPI_FUNC(PyObject *) PyInt_FromString(char*, char**, int); #ifdef Py_USING_UNICODE PyAPI_FUNC(PyObject *) PyInt_FromUnicode(Py_UNICODE*, Py_ssize_t, int); #endif PyAPI_FUNC(PyObject *) PyInt_FromLong(long); PyAPI_FUNC(PyObject *) PyInt_FromSize_t(size_t); PyAPI_FUNC(PyObject *) PyInt_FromSsize_t(Py_ssize_t);
#intoject.c #小整数 #ifndef NSMALLPOSINTS #define NSMALLPOSINTS 257 #endif #ifndef NSMALLNEGINTS #define NSMALLNEGINTS 5 #endif #if NSMALLNEGINTS + NSMALLPOSINTS > 0 /* References to small integers are saved in this array so that they can be shared. The integers that are saved are those in the range -NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive). */ #大整数 #define BLOCK_SIZE 1000 /* 1K less typical malloc overhead */ #define BHEAD_SIZE 8 /* Enough for a 64-bit pointer */ #define N_INTOBJECTS ((BLOCK_SIZE - BHEAD_SIZE) / sizeof(PyIntObject)) struct _intblock { struct _intblock *next; PyIntObject objects[N_INTOBJECTS]; }; typedef struct _intblock PyIntBlock; static PyIntBlock *block_list = NULL; static PyIntObject *free_list = NULL;
#intojects.c PyObject * PyInt_FromLong(long ival) { register PyIntObject *v; #尝试使用小整数对象池 #if NSMALLNEGINTS + NSMALLPOSINTS > 0 if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) { v = small_ints[ival + NSMALLNEGINTS]; Py_INCREF(v); #引用计数加一 #ifdef COUNT_ALLOCS if (ival >= 0) quick_int_allocs++; else quick_neg_int_allocs++; #endif return (PyObject *) v; } #endif #为通用整数对象池申请新的内存空间 if (free_list == NULL) { if ((free_list = fill_free_list()) == NULL) return NULL; } /* Inline PyObject_New */ v = free_list; #通过free_list找到空闲的内存 free_list = (PyIntObject *)Py_TYPE(v); PyObject_INIT(v, &PyInt_Type); v->ob_ival = ival; return (PyObject *) v; }
#stringobject.h typedef struct { PyObject_VAR_HEAD long ob_shash; #缓存对象的hash值, 初始-1, string_hash计算哈希 int ob_sstate; char ob_sval[1]; #实际作为一个字符指针指向一段内存, 这段内存保存着字符串所维护的实际字符串 } PyStringObject;
PyStringObject对一个的类型对象PyString_Type
#stringobject.c PyTypeObject PyString_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "str", PyStringObject_SIZE, sizeof(char), #指明变长对象保存的元素的单位长度 string_dealloc, /* tp_dealloc */ (printfunc)string_print, /* tp_print */ ... string_repr, /* tp_repr */ &string_as_number, /* tp_as_number */ &string_as_sequence, /* tp_as_sequence */ &string_as_mapping, /* tp_as_mapping */ (hashfunc)string_hash, /* tp_hash */ ... };
#stringobject.c PyObject * PyString_FromString(const char *str) { register size_t size; register PyStringObject *op; assert(str != NULL); size = strlen(str); #获得字符串长度 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { PyErr_SetString(PyExc_OverflowError, "string is too long for a Python string"); return NULL; } if (size == 0 && (op = nullstring) != NULL) { #处理null string #ifdef COUNT_ALLOCS null_strings++; #endif Py_INCREF(op); return (PyObject *)op; } if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { #处理字符 #ifdef COUNT_ALLOCS one_strings++; #endif Py_INCREF(op); return (PyObject *)op; } /* Inline PyObject_NewVar 创建新的对象, 并初始化 */ op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; Py_MEMCPY(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { PyObject *t = (PyObject *)op; PyString_InternInPlace(&t); op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1) { PyObject *t = (PyObject *)op; PyString_InternInPlace(&t); op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); } return (PyObject *) op; }
typedef struct { PyObject_VAR_HEAD #ob_size表示使用中的内存 /* Vector of pointers to list elements. list[0] is ob_item[0], etc. * 0 <= ob_size <= allocated * len(list) == ob_size * ob_item == NULL implies ob_size == allocated == 0 */ PyObject **ob_item; /* ob_item contains space for 'allocated' elements. The number currently in use is ob_size. */ Py_ssize_t allocated; #记录申请的总内存大小 } PyListObject;
#listobject.c #define PyList_MAXFREELIST 80 PyObject * PyList_New(Py_ssize_t size) #指定元素个数, 不是内存大小 { PyListObject *op; size_t nbytes; #ifdef SHOW_ALLOC_COUNT static int initialized = 0; if (!initialized) { Py_AtExit(show_alloc); initialized = 1; } #endif if (size < 0) { PyErr_BadInternalCall(); return NULL; } /* Check for overflow without an actual overflow, * which can cause compiler to optimise out 内存数量计算, 溢出检查*/ if ((size_t)size > PY_SIZE_MAX / sizeof(PyObject *)) return PyErr_NoMemory(); nbytes = size * sizeof(PyObject *); if (numfree) { #缓冲池可用 numfree--; op = free_list[numfree]; _Py_NewReference((PyObject *)op); #ifdef SHOW_ALLOC_COUNT count_reuse++; #endif } else { #缓冲池不可用 op = PyObject_GC_New(PyListObject, &PyList_Type); if (op == NULL) return NULL; #ifdef SHOW_ALLOC_COUNT count_alloc++; #endif } if (size <= 0) #为PyListObject对象中维护的元素列表申请空间 op->ob_item = NULL; else { op->ob_item = (PyObject **) PyMem_MALLOC(nbytes); if (op->ob_item == NULL) { Py_DECREF(op); return PyErr_NoMemory(); } memset(op->ob_item, 0, nbytes); } Py_SIZE(op) = size; op->allocated = size; _PyObject_GC_TRACK(op); return (PyObject *) op; }
static int ins1(PyListObject *self, Py_ssize_t where, PyObject *v) { Py_ssize_t i, n = Py_SIZE(self); PyObject **items; if (v == NULL) { PyErr_BadInternalCall(); return -1; } if (n == PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "cannot add more objects to list"); return -1; } if (list_resize(self, n+1) == -1) #调整列表容量realloc return -1; if (where < 0) { #确定插入点 为负的时候 where += n; if (where < 0) where = 0; } if (where > n) #大于容量时 where = n; items = self->ob_item; for (i = n; --i >= where; ) #元素后移 items[i+1] = items[i]; Py_INCREF(v); items[where] = v; #插入元素 return 0; }
static void list_dealloc(PyListObject *op) { Py_ssize_t i; PyObject_GC_UnTrack(op); Py_TRASHCAN_SAFE_BEGIN(op) if (op->ob_item != NULL) { #销毁元素列表 /* Do it backwards, for Christian Tismer. There's a simple test case where somehow this reduces thrashing when a *very* large list is created and immediately deleted. */ i = Py_SIZE(op); while (--i >= 0) { Py_XDECREF(op->ob_item[i]); #减少引用计数 } PyMem_FREE(op->ob_item); } if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op)) #销毁list对象自身, 先查看缓冲池是否满了, 没满就放入缓冲池 free_list[numfree++] = op; else Py_TYPE(op)->tp_free((PyObject *)op); Py_TRASHCAN_SAFE_END(op) }
#dictobject.h typedef struct { /* Cached hash code of me_key. Note that hash codes are C longs. * We have to use Py_ssize_t instead because dict_popitem() abuses * me_hash to hold a search finger. */ Py_ssize_t me_hash; PyObject *me_key; PyObject *me_value; } PyDictEntry;
PyObject *PyDict_New(void) { register PyDictObject *mp; if (dummy == NULL) { /* Auto-initialize dummy, 是一个字符串对象PyString_FromString */ dummy = PyString_FromString("<dummy key>"); if (dummy == NULL) return NULL; #ifdef SHOW_CONVERSION_COUNTS Py_AtExit(show_counts); #endif #ifdef SHOW_ALLOC_COUNT Py_AtExit(show_alloc); #endif #ifdef SHOW_TRACK_COUNT Py_AtExit(show_track); #endif } if (numfree) { #使用缓冲池 mp = free_list[--numfree]; assert (mp != NULL); assert (Py_TYPE(mp) == &PyDict_Type); _Py_NewReference((PyObject *)mp); if (mp->ma_fill) { EMPTY_TO_MINSIZE(mp); } else { /* At least set ma_table and ma_mask; these are wrong if an empty but presized dict is added to freelist */ INIT_NONZERO_DICT_SLOTS(mp); } assert (mp->ma_used == 0); assert (mp->ma_table == mp->ma_smalltable); assert (mp->ma_mask == PyDict_MINSIZE - 1); #ifdef SHOW_ALLOC_COUNT count_reuse++; #endif } else { #创建新的PyDictObject对象 mp = PyObject_GC_New(PyDictObject, &PyDict_Type); if (mp == NULL) return NULL; EMPTY_TO_MINSIZE(mp); #ifdef SHOW_ALLOC_COUNT count_alloc++; #endif } mp->ma_lookup = lookdict_string; #包含了哈希函数和冲突函数的实现 #ifdef SHOW_TRACK_COUNT count_untracked++; #endif #ifdef SHOW_CONVERSION_COUNTS ++created; #endif return (PyObject *)mp; } /*将ma_smalltable清零, 同时设置ma_used和ma_fill*/ #define EMPTY_TO_MINSIZE(mp) do { \ memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); \ (mp)->ma_used = (mp)->ma_fill = 0; \ INIT_NONZERO_DICT_SLOTS(mp); \ } while(0) /*将ma_table指向ma_samlltable, 并设置ma_mask为7*/ #define INIT_NONZERO_DICT_SLOTS(mp) do { \ (mp)->ma_table = (mp)->ma_smalltable; \ (mp)->ma_mask = PyDict_MINSIZE - 1; \ } while(0)
static PyDictEntry *lookdict(PyDictObject *mp, PyObject *key, register long hash) { register size_t i; register size_t perturb; register PyDictEntry *freeslot; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; register int cmp; PyObject *startkey; i = (size_t)hash & mask; #哈希, 定位第一个entry ep = &ep0[i]; #entry处理Unused或者entry中key匹配 if (ep->me_key == NULL || ep->me_key == key) return ep; if (ep->me_key == dummy) #第一个entry的key为dummy状态 freeslot = ep; #freeslot指向第一个处于Dummy的entry else { #检查Active的entry if (ep->me_hash == hash) { startkey = ep->me_key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); if (cmp < 0) return NULL; if (ep0 == mp->ma_table && ep->me_key == startkey) { if (cmp > 0) return ep; } else { /* The compare did major nasty stuff to the * dict: start over. * XXX A clever adversary could prevent this * XXX from terminating. */ return lookdict(mp, key, hash); } } freeslot = NULL; } /* In the loop, me_key == dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL) #到达Unused状态entry,搜索失败 return freeslot == NULL ? ep : freeslot; if (ep->me_key == key) #检查是否引用相同 return ep; if (ep->me_hash == hash && ep->me_key != dummy) { #检查值是否相同 startkey = ep->me_key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); if (cmp < 0) return NULL; if (ep0 == mp->ma_table && ep->me_key == startkey) { if (cmp > 0) return ep; } else { /* The compare did major nasty stuff to the * dict: start over. * XXX A clever adversary could prevent this * XXX from terminating. */ return lookdict(mp, key, hash); } } else if (ep->me_key == dummy && freeslot == NULL) #设置freeslot freeslot = ep; } assert(0); /* NOT REACHED */ return 0; }
/* Restructure the table by allocating a new table and reinserting all items again. When entries have been deleted, the new table may actually be smaller than the old one. */ static int dictresize(PyDictObject *mp, Py_ssize_t minused) { Py_ssize_t newsize; PyDictEntry *oldtable, *newtable, *ep; Py_ssize_t i; int is_oldtable_malloced; PyDictEntry small_copy[PyDict_MINSIZE]; assert(minused >= 0); /* Find the smallest table size > minused. 确定新table的大小 */ for (newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<= 1) ; if (newsize <= 0) { PyErr_NoMemory(); return -1; } /* Get space for a new table. */ oldtable = mp->ma_table; assert(oldtable != NULL); is_oldtable_malloced = oldtable != mp->ma_smalltable; if (newsize == PyDict_MINSIZE) { #新table可以使用mp->ma_smalltable /* A large table is shrinking, or we can't get any smaller. */ newtable = mp->ma_smalltable; if (newtable == oldtable) { if (mp->ma_fill == mp->ma_used) { /* No dummies, so no point doing anything. */ return 0; } /* We're not going to resize it, but rebuild the table anyway to purge old dummy entries. Subtle: This is *necessary* if fill==size, as lookdict needs at least one virgin slot to terminate failing searches. If fill < size, it's merely desirable, as dummies slow searches. */ assert(mp->ma_fill > mp->ma_used); memcpy(small_copy, oldtable, sizeof(small_copy)); #将旧table拷贝, 进行备份 oldtable = small_copy; } } else { #新table需要在系统堆上申请 newtable = PyMem_NEW(PyDictEntry, newsize); if (newtable == NULL) { PyErr_NoMemory(); return -1; } } /* Make the dict empty, using the new table. */ assert(newtable != oldtable); mp->ma_table = newtable; mp->ma_mask = newsize - 1; memset(newtable, 0, sizeof(PyDictEntry) * newsize); mp->ma_used = 0; i = mp->ma_fill; mp->ma_fill = 0; /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ for (ep = oldtable; i > 0; ep++) { if (ep->me_value != NULL) { /* active entry */ --i; insertdict_clean(mp, ep->me_key, (long)ep->me_hash, ep->me_value); } else if (ep->me_key != NULL) { /* dummy entry */ --i; assert(ep->me_key == dummy); Py_DECREF(ep->me_key); } /* else key == value == NULL: nothing to do */ } if (is_oldtable_malloced) PyMem_DEL(oldtable); return 0; }
确定新table的大小
如果新table大小为8, 不需要在堆上分配空间, 使用ma_smalltabel, 否则在堆上分配空间
新table初始化
对非Unused处理, Active插入到新表,Dummy丢弃
如果旧table指向系统堆内存, 需要释放内存
删除流程PyDict_DelItem
计算hash
搜索entry
删除entry中的元素, 并将Active置为Dummy态
调整维护table使用情况的变量
#ifndef PyDict_MAXFREELIST #define PyDict_MAXFREELIST 80 #endif static PyDictObject *free_list[PyDict_MAXFREELIST]; static int numfree = 0; static void dict_dealloc(register PyDictObject *mp) { register PyDictEntry *ep; Py_ssize_t fill = mp->ma_fill; PyObject_GC_UnTrack(mp); Py_TRASHCAN_SAFE_BEGIN(mp) #调整dict中对象引用计数 for (ep = mp->ma_table; fill > 0; ep++) { if (ep->me_key) { --fill; Py_DECREF(ep->me_key); Py_XDECREF(ep->me_value); } } if (mp->ma_table != mp->ma_smalltable) PyMem_DEL(mp->ma_table); #释放table指向的堆内存 if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type) free_list[numfree++] = mp; #销毁的对象放入缓冲池 else Py_TYPE(mp)->tp_free((PyObject *)mp); Py_TRASHCAN_SAFE_END(mp) }