先打印一下php调用过程:
在增加一张异常调用的流程图:
今天稍微对php做下总结,首先介绍最重要的两个数据结构,以及两个结构间的数据传递
struct _zend_op_array {
/* Common elements */
zend_uchar type;
const char *function_name;
zend_class_entry *scope;
zend_uint fn_flags;
union _zend_function *prototype;
zend_uint num_args;
zend_uint required_num_args;
zend_arg_info *arg_info;
/* END of common elements */
zend_uint *refcount;
zend_op *opcodes;
zend_uint last;
zend_compiled_variable *vars;
int last_var;
zend_uint T;
zend_literal *literals;
int last_literal;
...
};
struct _zend_execute_data {
struct _zend_op *opline;
zend_function_state function_state;
zend_op_array *op_array;
zval *object;
HashTable *symbol_table;
struct _zend_execute_data *prev_execute_data;
zval *old_error_reporting;
zend_bool nested;
zval **original_return_value;
zend_class_entry *current_scope;
zend_class_entry *current_called_scope;
zval *current_this;
struct _zend_op *fast_ret; /* used by FAST_CALL/FAST_RET (finally keyword) */
zval *delayed_exception;
call_slot *call_slots;
call_slot *call;
};
注释下php7的结构简化了不少
/*
* Stack Frame Layout (the whole stack frame is allocated at once)
* ==================
*
* +========================================+
* EG(current_execute_data) -> | zend_execute_data |
* +----------------------------------------+
* EX_CV_NUM(0) ---------> | VAR[0] = ARG[1] |
* | ... |
* | VAR[op_array->num_args-1] = ARG[N] |
* | ... |
* | VAR[op_array->last_var-1] |
* | VAR[op_array->last_var] = TMP[0] |
* | ... |
* | VAR[op_array->last_var+op_array->T-1] |
* | ARG[N+1] (extra_args) |
* | ... |
* +----------------------------------------+
*/
此处省略掉语法解析(| variable '=' expr{ zend_check_writable_variable(&$1); zend_do_assign(&$$, &$1, &$3 TSRMLS_CC); }),
直接到opconde生成阶段
op_array->last_var++;
void fetch_simple_variable_ex(znode *result, znode *varname, int bp, zend_uchar op TSRMLS_DC) /* {{{ */
{
zend_op opline;
zend_op *opline_ptr;
zend_llist *fetch_list_ptr;
if (varname->op_type == IS_CONST) {
ulong hash;
if (Z_TYPE(varname->u.constant) != IS_STRING) {
convert_to_string(&varname->u.constant);
}
hash = str_hash(Z_STRVAL(varname->u.constant), Z_STRLEN(varname->u.constant));
if (!zend_is_auto_global_quick(Z_STRVAL(varname->u.constant), Z_STRLEN(varname->u.constant), hash TSRMLS_CC) &&
!(Z_STRLEN(varname->u.constant) == (sizeof("this")-1) &&
!memcmp(Z_STRVAL(varname->u.constant), "this", sizeof("this") - 1)) &&
(CG(active_op_array)->last == 0 ||
CG(active_op_array)->opcodes[CG(active_op_array)->last-1].opcode != ZEND_BEGIN_SILENCE)) {
result->op_type = IS_CV;
result->u.op.var = lookup_cv(CG(active_op_array), Z_STRVAL(varname->u.constant), Z_STRLEN(varname->u.constant), hash TSRMLS_CC);
Z_STRVAL(varname->u.constant) = (char*)CG(active_op_array)->vars[result->u.op.var].name;
result->EA = 0;
return;
}
}
if (bp) {
opline_ptr = &opline;
init_op(opline_ptr TSRMLS_CC);
} else {
opline_ptr = get_next_op(CG(active_op_array) TSRMLS_CC);
}
opline_ptr->opcode = op;
opline_ptr->result_type = IS_VAR;
opline_ptr->result.var = get_temporary_variable(CG(active_op_array));
SET_NODE(opline_ptr->op1, varname);
GET_NODE(result, opline_ptr->result);
SET_UNUSED(opline_ptr->op2);
opline_ptr->extended_value = ZEND_FETCH_LOCAL;
if (varname->op_type == IS_CONST) {
CALCULATE_LITERAL_HASH(opline_ptr->op1.constant);
if (zend_is_auto_global_quick(Z_STRVAL(varname->u.constant), Z_STRLEN(varname->u.constant), Z_HASH_P(&CONSTANT(opline_ptr->op1.constant)) TSRMLS_CC)) {
opline_ptr->extended_value = ZEND_FETCH_GLOBAL;
}
}
if (bp) {
zend_stack_top(&CG(bp_stack), (void **) &fetch_list_ptr);
zend_llist_add_element(fetch_list_ptr, opline_ptr);
}
}
但是前提大家需要知道语法解析时将字符串或者整形统一解析到znode->u.constant中,
result的数据结构是zonde,
后续opcode阶段如果是变量赋值的是znode_op的var属性,即偏移量,znode_op.zv属性,即常量信息(该值有pass_two赋值)
typedef union _znode_op {
zend_uint constant;
zend_uint var;
zend_uint num;
zend_ulong hash;
zend_uint opline_num; /* Needs to be signed */
zend_op *jmp_addr;
zval *zv;
zend_literal *literal;
void *ptr; /* Used for passing pointers from the compile to execution phase, currently used for traits */
} znode_op;
typedef struct _znode { /* used only during compilation */
int op_type;
union {
znode_op op;
zval constant; /* replaced by literal/zv */
zend_op_array *op_array;
zend_ast *ast;
} u;
zend_uint EA; /* extended attributes */
} znode;
这里补充下,语法扫描获取znode后,进入语法解析阶段,此刻op_array中有个特殊属性literals,该属性是个数组会提前分配好,最终可进行opcode阶段优化见update_op1_const函数,将变量转为常量。
accel_startup:
accelerator_orig_compile_file = zend_compile_file; // 保存原生handlezend_compile_file = persistent_compile_file; //赋值新的handle
用persistent_compile_file -> compile_and_cache_file -> cache_script_in_shared_memory -> zend_accel_script_optimize-> zend_accel_optimize-> zend_optimize-> replace_var_by_const->update_op1_const最后会利用literals数组将变量转换为常量更改opline的op1或者op2
该数组index与value方式进行存储
#define SET_NODE(target, src) do { \
target ## _type = (src)->op_type; \
if ((src)->op_type == IS_CONST) { \
target.constant = zend_add_literal(CG(active_op_array), &(src)->u.constant TSRMLS_CC); \
} else { \
target = (src)->u.op; \
} \
} while (0)
如果是常量op1->constant = index, 其种value存在将数据存到literals中,接下来的用途见pass_two;
compile->pass_two 此时会生成opcode的回调op->handler,并且会从constant的index中将value赋值给opline中的op1.zv,
这样在真正execute阶段用的就是op1.zv获取常量信息
while (opline < end) {
if (opline->op1_type == IS_CONST) {
opline->op1.zv = &op_array->literals[opline->op1.constant].constant;
}
if (opline->op2_type == IS_CONST) {
opline->op2.zv = &op_array->literals[opline->op2.constant].constant;
}
。。。。。。
ZEND_VM_SET_OPCODE_HANDLER(opline);
opline++;
}
#define IS_CONST (1<<0) #define IS_TMP_VAR (1<<1) #define IS_VAR (1<<2) #define IS_UNUSED (1<<3) /* Unused variable */ #define IS_CV (1<<4) /* Compiled variable */
如:
opline->result_type = IS_TMP_VAR; // opline->result.var = get_temporary_variable(CG(active_op_array));
opline->result_type = IS_VAR; opline->result.var = get_temporary_variable(CG(active_op_array));最后生成的函数就是
ZEND_ASSIGN_SPEC_VAR_TMP_HANDLER
VAR与TMP均来自临时变量,但是两者用的数据结构不同,具体可见以下两个函数
value = _get_zval_ptr_tmp(opline->op2.var, execute_data, &free_op2 TSRMLS_CC); variable_ptr_ptr = _get_zval_ptr_ptr_var(opline->op1.var, execute_data, &free_op1 TSRMLS_CC);
完!
zval **param = zend_vm_stack_get_arg(arg_num TSRMLS_CC);
static zend_always_inline zval** zend_vm_stack_get_arg(int requested_arg TSRMLS_DC)
{
return zend_vm_stack_get_arg_ex(EG(current_execute_data)->prev_execute_data, requested_arg);
}
为什么要用prev_execute_data因为函数压栈是在当前excute_data之前的excute_data完成,
实际上,在真正执行函数之前,php会将参数个数入栈。
上面是拆分讲解一个函数的调用过程,当将所有程序解析成op_array数组后,就会调用execute_ex来执行所有的opcode数组。
zend_execute_scripts -> zend_execute -> zend_execute_ex -> execute_ex -> i_create_execute_data_from_op_array
if (0) {
zend_vm_enter:
execute_data = i_create_execute_data_from_op_array(EG(active_op_array), 1 TSRMLS_CC);
}
LOAD_REGS();
LOAD_OPLINE();
while (1) {
int ret;
#ifdef ZEND_WIN32
if (EG(timed_out)) {
zend_timeout(0);
}
#endif
if ((ret = OPLINE->handler(execute_data TSRMLS_CC)) > 0) {
switch (ret) {
case 1:
EG(in_execution) = original_in_execution;
return;
case 2:
goto zend_vm_enter;
break;
case 3:
execute_data = EG(current_execute_data);
break;
default:
break;
}
}
}
zend_error_noreturn(E_ERROR, "Arrived at end of main loop which shouldn't happen");
}
以上是个死循环,解析op_array数组,需要注意的是返回值
#define ZEND_VM_CONTINUE() return 0 #define ZEND_VM_RETURN() return 1 返回return是函数终止, #define ZEND_VM_ENTER() return 2 函数调用 #define ZEND_VM_LEAVE() return 3 函数退出
ZEND_VM_RETURN函数返回returen终止,5.2版本很少有返回1,但是5.3增加了yield调用后,yield调用的opcode基本上都会返回1,从而
函数终止,有个疑问,return了后下面如何执行?
这里面有个需要注意的点就是,当我们调用函数的时候大家知道opcode解析的函数是
zend_do_fcall_common_helper_SPEC
该函数分为两部分,
typedef union _zend_function {
zend_uchar type; /* ...﹚... #define ZEND_USER_FUNCTION 2
MUST be the first element of this struct! */
struct {
zend_uchar type; /* never used */
char *function_name; //ㄧ..
zend_class_entry *scope; //ㄧ.┮..办
zend_uint fn_flags; // ..猭....单ZEND_ACC_STATIC单
union _zend_function *prototype; //ㄧ.
zend_uint num_args; //....
zend_uint required_num_args; //惠璶....
zend_arg_info *arg_info; //..獺.
zend_bool pass_rest_by_reference;
unsigned char return_reference; //
} common;
zend_op_array op_array; //ㄧ.い巨
zend_internal_function internal_function;
} zend_function;
1 内部C函数(ZEND_INTERNAL_FUNCTION):内部函数在zend_register_functions时候就注册到了函数表,其中internal_function.handler指向C函数(函数指针)
通过opcode解析函数名到函数表中查找即可获取到函数指针,进行调用
2 php函数(ZEND_USER_FUNCTION):会继续调用zend_execute,所以刚才说的ZEND_VM_RETURN终止的只是具体某个函数而已,大家如果在一个函数
中写yield,函数就不会继续执行了,就是这个道理。
顺带着介绍下词法解析过程
Zend/zend_language_scanner.l 词法解析规则文件
Zend/zend_language_parser.y 语法分析规则文件
语法扫描(lex_scan)前都会进行该函数调用进行准备,可以参考函数token_get_all的实现
static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
{
YYCURSOR = (YYCTYPE*)str;
SCNG(yy_start) = YYCURSOR;
YYLIMIT = YYCURSOR + len;
}
顺便说一下php7:
compile_file -> zendparse(yyparse) -> zend_compile_top_stmt -> zend_compile_stmt 生成opcode,因为php7中间用了抽象语法树,
需要根据抽象语法树的节点进行分析后获得最的opcode
在这里补充下一个知识点:
ZEND_ASSIGN_ADD_SPEC_VAR_CONST_HANDLER
if (RETURN_VALUE_USED(opline)) {
PZVAL_LOCK(*var_ptr);
EX_T(opline->result.var).var.ptr = *var_ptr;
}
这里放的是临时变量的var属性ptr,为什么是指针,而不是下面的tmp_var,是因为ptr指向的zval是不能立马释放的,是需要assign
赋值给其他变量用,也就是多个*zal 共同指向的结构,这个时候采用的就是存放到临时变量的var中的ptr属性。
再看个例子
ZEND_ADD_SPEC_CV_TMP_HANDLER
static int ZEND_FASTCALL ZEND_ADD_SPEC_CV_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
{
USE_OPLINE
zend_free_op free_op2;
SAVE_OPLINE();
fast_add_function(&EX_T(opline->result.var).tmp_var,
_get_zval_ptr_cv_BP_VAR_R(execute_data, opline->op1.var TSRMLS_CC),
_get_zval_ptr_tmp(opline->op2.var, execute_data, &free_op2 TSRMLS_CC) TSRMLS_CC);
zval_dtor(free_op2.var);
CHECK_EXCEPTION();
ZEND_VM_NEXT_OPCODE();
}
很明显放到了赋值给了临时变量,为什么是临时变量,因为该变量不引用其他指针数据,所以释放比较简单
a++的opcode
zend_do_post_incdec 的opcode opline->result_type = IS_TMP_VAR;很明显是个tmp变量
opline = get_next_op(CG(active_op_array) TSRMLS_CC);
opline->opcode = op;
SET_NODE(opline->op1, op1);
SET_UNUSED(opline->op2);
opline->result_type = IS_TMP_VAR;
opline->result.var = get_temporary_variable(CG(active_op_array));
GET_NODE(result, opline->result);
static int ZEND_FASTCALL ZEND_POST_INC_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
{
USE_OPLINE
zend_free_op free_op1;
zval **var_ptr, *retval;
SAVE_OPLINE();
var_ptr = _get_zval_ptr_ptr_var(opline->op1.var, execute_data, &free_op1 TSRMLS_CC);
if (IS_VAR == IS_VAR && UNEXPECTED(var_ptr == NULL)) {
zend_error_noreturn(E_ERROR, "Cannot increment/decrement overloaded objects nor string offsets");
}
if (IS_VAR == IS_VAR && UNEXPECTED(*var_ptr == &EG(error_zval))) {
ZVAL_NULL(&EX_T(opline->result.var).tmp_var);
if (free_op1.var) {zval_ptr_dtor_nogc(&free_op1.var);};
CHECK_EXCEPTION();
ZEND_VM_NEXT_OPCODE();
}
retval = &EX_T(opline->result.var).tmp_var;
ZVAL_COPY_VALUE(retval, *var_ptr);
zendi_zval_copy_ctor(*retval);
当返回的是tmp_var变量时,说明该变量只是个临时值不会做额外的操作,所以也不需要增加gc_recount++等操作,直接堆栈释放即可。
++a的opcode:opline->result_type = IS_VAR; IS_VAR类型
opline = get_next_op(CG(active_op_array) TSRMLS_CC);
opline->opcode = op;
SET_NODE(opline->op1, op1);
SET_UNUSED(opline->op2);
opline->result_type = IS_VAR;
opline->result.var = get_temporary_variable(CG(active_op_array));
GET_NODE(result, opline->result);
if (RETURN_VALUE_USED(opline)) {
PZVAL_LOCK(*var_ptr);
EX_T(opline->result.var).var.ptr = *var_ptr;
}
static zend_always_inline zval **_get_zval_ptr_ptr_var(zend_uint var, const zend_execute_data *execute_data, zend_free_op *should_free TSRMLS_DC)
{
zval** ptr_ptr = EX_T(var).var.ptr_ptr;
if (EXPECTED(ptr_ptr != NULL)) {
PZVAL_UNLOCK(*ptr_ptr, should_free);
} else {
/* string offset */
PZVAL_UNLOCK(EX_T(var).str_offset.str, should_free);
}
return ptr_ptr;
}
获取该变量值通过该函数,所以就会进行释放gc_recount--.
var和tmp类型的区别是什么,大家都是放在tmp分配的堆栈中,区别就是,var.ptr_ptr是个指针,为了节省空间大家目前先共用,比如++a,返回的临时变量和变量a返回值一样,所以就用指针指向同一个zval,比如a++,返回值的临时变量和变量a返回值不一样,所以必须重新申请一个zval,所以就干脆扔到了tmp中,可以随时释放。