php5.6的哈希表比较恶心,php7也对哈希表进行了改造,先介绍下php5.6的哈希表
原来大家都清楚,我们看一下更细的一部分,如何更新插入:
static zend_never_inline zval **_get_zval_cv_lookup_BP_VAR_W(zval ***ptr, zend_uint var TSRMLS_DC)
{
zend_compiled_variable *cv = &CV_DEF_OF(var);
if (!EG(active_symbol_table)) {
Z_ADDREF(EG(uninitialized_zval));
*ptr = (zval**)EX_CV_NUM(EG(current_execute_data), EG(active_op_array)->last_var + var);
**ptr = &EG(uninitialized_zval);
} else if (zend_hash_quick_find(EG(active_symbol_table), cv->name, cv->name_len+1, cv->hash_value, (void **)ptr)==FAILURE) {
Z_ADDREF(EG(uninitialized_zval));
zend_hash_quick_update(EG(active_symbol_table), cv->name, cv->name_len+1, cv->hash_value, &EG(uninitialized_zval_ptr), sizeof(zval *), (void **)ptr);
}
return *ptr;
}
ZEND_API int _zend_hash_quick_add_or_update(HashTable *ht, const char *arKey, uint nKeyLength, ulong h, void *pData, uint nDataSize, void **pDest, int flag ZEND_FILE_LINE_DC);
zend_hash_quick_update(EG(active_symbol_table), cv->name, cv->name_len+1, cv->hash_value, &EG(uninitialized_zval_ptr), sizeof(zval *), (void **)ptr);
首先要注意看的是&EG(uninitialized_zval_ptr)是一个**zval结构指针, 把它存进去的目的是初始化,也就是让hash表结构指针有所指向,一个固定位置,后续会根据pDest指针进行赋值。
其次是pDest是二级指针,为什么会是二级指针,因为c语言函数传递都是值传递,要改变指针值只能将指针地址传入。
但是虽然是二级指针,事实调用的却是用的三级真值***ptr,那是因为符号表变量存的永远是**zval结构,加上ptr变量地址,所以三级就是这样来的。
#define UPDATE_DATA(ht, p, pData, nDataSize) \
if (nDataSize == sizeof(void*)) { \
if ((p)->pData != &(p)->pDataPtr) { \
pefree_rel((p)->pData, (ht)->persistent); \
} \
memcpy(&(p)->pDataPtr, pData, sizeof(void *)); \
(p)->pData = &(p)->pDataPtr; \
} else { \
if ((p)->pData == &(p)->pDataPtr) { \
(p)->pData = (void *) pemalloc_rel(nDataSize, (ht)->persistent); \
(p)->pDataPtr=NULL; \
} else { \
(p)->pData = (void *) perealloc_rel((p)->pData, nDataSize, (ht)->persistent); \
/* (p)->pDataPtr is already NULL so no need to initialize it */ \
} \
memcpy((p)->pData, pData, nDataSize); \
}
具体看下面画的很难看的图
接下来是php7的数据结构:
struct _zend_string {
zend_refcounted_h gc;
zend_ulong h; /* hash value */
size_t len;
char val[1];
};
typedef struct _Bucket {
zval val;
zend_ulong h; /* hash value (or numeric index) */
zend_string *key; /* string key or NULL for numerics */
} Bucket;
typedef struct _zend_array HashTable;
struct _zend_array {
zend_refcounted_h gc;
union {
struct {
ZEND_ENDIAN_LOHI_4(
zend_uchar flags,
zend_uchar nApplyCount,
zend_uchar nIteratorsCount,
zend_uchar reserve)
} v;
uint32_t flags;
} u;
uint32_t nTableMask;
Bucket *arData;
uint32_t nNumUsed;
uint32_t nNumOfElements;
uint32_t nTableSize;
uint32_t nInternalPointer;
zend_long nNextFreeElement;
dtor_func_t pDestructor;
};
/*
* HashTable Data Layout
* =====================
*
* +=============================+
* | HT_HASH(ht, ht->nTableMask) |
* | ... |
* | HT_HASH(ht, -1) |
* +-----------------------------+
* ht->arData ---> | Bucket[0] |
* | ... |
* | Bucket[ht->nTableSize-1] |
* +=============================+
*/
ht->arData仅是一级指针,分为上下两部分,上部分是索引根据hash值偏移获取下半部分的index,从而拿到内容
#define HT_HASH_EX(data, idx) \
((uint32_t*)(data))[(int32_t)(idx)]
根据hash值获取int类型的偏移量,通常idx为负数见初始化:
static void zend_always_inline zend_hash_real_init_ex(HashTable *ht, int packed)
{
HT_ASSERT(GC_REFCOUNT(ht) == 1);
ZEND_ASSERT(!((ht)->u.flags & HASH_FLAG_INITIALIZED));
if (packed) {
HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
(ht)->u.flags |= HASH_FLAG_INITIALIZED | HASH_FLAG_PACKED;
HT_HASH_RESET_PACKED(ht);
} else {
(ht)->nTableMask = -(ht)->nTableSize;
HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
(ht)->u.flags |= HASH_FLAG_INITIALIZED;
if (EXPECTED(ht->nTableMask == -8)) {
Bucket *arData = ht->arData;
HT_HASH_EX(arData, -8) = -1;
HT_HASH_EX(arData, -7) = -1;
HT_HASH_EX(arData, -6) = -1;
HT_HASH_EX(arData, -5) = -1;
HT_HASH_EX(arData, -4) = -1;
HT_HASH_EX(arData, -3) = -1;
HT_HASH_EX(arData, -2) = -1;
HT_HASH_EX(arData, -1) = -1;
} else {
HT_HASH_RESET(ht);
}
}
}
# define HT_HASH_TO_BUCKET_EX(data, idx) \ ((Bucket*)((char*)(data) + (idx)))根据上面获取的偏移量获取bucket指针
可见增加元素的方法
add_to_hash:
HANDLE_BLOCK_INTERRUPTIONS();
idx = ht->nNumUsed++;
ht->nNumOfElements++;
if (ht->nInternalPointer == HT_INVALID_IDX) {
ht->nInternalPointer = idx;
}
zend_hash_iterators_update(ht, HT_INVALID_IDX, idx);
p = ht->arData + idx;
p->key = key;
if (!ZSTR_IS_INTERNED(key)) {
zend_string_addref(key);
ht->u.flags &= ~HASH_FLAG_STATIC_KEYS;
zend_string_hash_val(key);
}
p->h = h = ZSTR_H(key);
ZVAL_COPY_VALUE(&p->val, pData);
nIndex = h | ht->nTableMask;
Z_NEXT(p->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx);
HANDLE_UNBLOCK_INTERRUPTIONS();
首先zval元素的位置是顺序存储的,就是ht->arData的下半部分,然后通过hash算出nIndex位移即上部分,首先将上部分的初始位置(-1)赋值给下半部分zval的next属性,然后赋值为下部分的位置(1),如此循环,当加入一个新的hash相同的数据时,此时将1赋值给新的zval的next,上部分的值为2。
2 $var = 1
php7解析过程 :
compile_file -> zendparse(yyparse) -> zend_compile_top_stmt -> zend_compile_stmt-> zend_compile_assign -> zend_compile_simple_var -> zend_try_compile_cv -> zend_compile_simple_var_no_cv
获取opcode