解析主要在lib/scanner.php中通过Tokenizer这个类来实现,也就是在main.php中开始调用new scanner的对象,准备开始扫描漏洞,此时在scanner的构造函数中完成token流的解析
接着到tokenize中进行具体的token解析,这里要借助php zend引擎自带的一个词法分析的函数,token_get_all();
array(11) { [0] => array(3) { [0] => int(379) [1] => string(6) " int(1) } [1] => array(3) { [0] => int(320) [1] => string(2) "$a" [2] => int(2) } [2] => string(1) "=" [3] => string(1) "[" [4] => array(3) { [0] => int(323) [1] => string(3) ""a"" [2] => int(2) } [5] => string(1) "," [6] => array(3) { [0] => int(323) [1] => string(3) ""b"" [2] => int(2) } [7] => string(1) "]" [8] => string(1) ";" [9] => array(3) { [0] => int(382) [1] => string(1) " " [2] => int(2) } [10] => array(3) { [0] => int(381) [1] => string(3) "?> " [2] => int(3) } }
function prepare_tokens() { // delete whitespaces and other unimportant tokens, rewrite some special tokens for($i=0, $max=count($this->tokens); $i<$max; $i++) //遍历token数组 { if( is_array($this->tokens[$i]) ) //a. unset掉可忽略token,比如php的开始标签 b. 闭合标签变分号; c.= 标签表echo { if( in_array($this->tokens[$i][0], Tokens::$T_IGNORE) ) unset($this->tokens[$i]); else if( $this->tokens[$i][0] === T_CLOSE_TAG ) $this->tokens[$i] = ';'; else if( $this->tokens[$i][0] === T_OPEN_TAG_WITH_ECHO ) $this->tokens[$i][1] = 'echo'; } // @ (depress errors) disturbs connected token handling else if($this->tokens[$i] === '@') //unset掉@符号 { unset($this->tokens[$i]); } // rewrite $array{index} to $array[index] //对于数组处理如果当前是花括号并且前一个token是变量 else if( $this->tokens[$i] === '{' && isset($this->tokens[$i-1]) && ((is_array($this->tokens[$i-1]) && $this->tokens[$i-1][0] === T_VARIABLE) || $this->tokens[$i-1] === ']') ) //或者上一个token是[,当前是{,则肯定是数组变量(主要是多维数组) { $this->tokens[$i] = '['; //则令当前token为左方括号 $f=1; while($this->tokens[$i+$f] !== '}') //此时while循环找下一个与当前花括号对应的右花括号 { $f++; if(!isset($this->tokens[$i+$f])) { addError('Could not find closing brace of '.$this->tokens[$i-1][1].'{}.', array_slice($this->tokens, $i-1, 2), $this->tokens[$i-1][2], $this->filename); break; //没找到则退出,说明语法有问题 } } $this->tokens[$i+$f] = ']'; //否则令右花括号为] } } // rearranged key index of tokens $this->tokens = array_values($this->tokens); }
// rewrite $arrays[] to $variables and save keys in $tokens[$i][3]
function array_reconstruct_tokens() { for($i=0,$max=count($this->tokens); $i<$max; $i++) //遍历所有token数组 { // check for arrays if( is_array($this->tokens[$i]) && $this->tokens[$i][0] === T_VARIABLE && $this->tokens[$i+1] === '[' ) //当前token是个变量,并且下一个token是[,则最少即为一维数组 { $this->tokens[$i][3] = array(); //初始化第四个元素为数组 $has_more_keys = true; $index = -1; $c=2; // loop until no more index found: array[1][2][3] while($has_more_keys && $index < MAX_ARRAY_KEYS) //while循环遍历多维数组,max默认为10(这个数已经够了) { $index++; // save constant index as constant //找到当前变量对应的右括号,主要是针对常量 if(($this->tokens[$i+$c][0] === T_CONSTANT_ENCAPSED_STRING || $this->tokens[$i+$c][0] === T_LNUMBER || $this->tokens[$i+$c][0] === T_NUM_STRING || $this->tokens[$i+$c][0] === T_STRING) && $this->tokens[$i+$c+1] === ']') { unset($this->tokens[$i+$c-1]); //unset掉左括号 $this->tokens[$i][3][$index] = str_replace(array('"', "'"), '', $this->tokens[$i+$c][1]); //把键名放到第四个数组元素中 unset($this->tokens[$i+$c]); //unset掉键名 unset($this->tokens[$i+$c+1]); //unset掉右括号 $c+=2; #c+2尝试找到下一维数组 // save tokens of non-constant index as token-array for backtrace later //$a[$b][][]对于这种非常量索引的情况 } else { $this->tokens[$i][3][$index] = array(); $newbraceopen = 1; //就当作是左括号的个数 unset($this->tokens[$i+$c-1]); //unset掉左括号 while($newbraceopen !== 0) { if( $this->tokens[$i+$c] === '[' ) { $newbraceopen++; //哇,又遇到新的一个左括号 } else if( $this->tokens[$i+$c] === ']' ) { $newbraceopen--; //此时说明一组左右括号遍历完 } else { $this->tokens[$i][3][$index][] = $this->tokens[$i+$c]; //此时将变量索引对应的数组保存在第四个元素中 } unset($this->tokens[$i+$c]); //unset掉该变量索引或unset掉右括号或左括号 $c++; //就把它当作游标吧,游标不断滑动 if(!isset($this->tokens[$i+$c])) //尝试找到=或者分号,实际就是结束当前数组的符号,没有找到则break退出 { addError('Could not find closing bracket of '.$this->tokens[$i][1].'[].', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename); break; } } unset($this->tokens[$i+$c-1]); //这一处unset为了处理特殊情况 } if($this->tokens[$i+$c] !== '[') $has_more_keys = false; $c++; } $i+=$c-1; } } // return tokens with rearranged key index $this->tokens = array_values($this->tokens); }
function fix_tokens() { for($i=0; $i<($max=count($this->tokens)); $i++) { // convert `backticks` to backticks() #处理反引号 if( $this->tokens[$i] === '`' ) { $f=1; while( $this->tokens[$i+$f] !== '`' ) #通过while循环来,将`xxx` 转换成backticks标识的token { // get line_nr of any near token if( is_array($this->tokens[$i+$f]) ) $line_nr = $this->tokens[$i+$f][2]; #此时记录行号 $f++; #f++走到右反引号 if(!isset($this->tokens[$i+$f]) || $this->tokens[$i+$f] === ';') #无闭合则报错 { addError('Could not find closing backtick `.', array_slice($this->tokens, $i, 5), $this->tokens[$i+1][2], $this->filename); break; } } if(!empty($line_nr)) #若反引号中间内容不为空,则进行重构 { $this->tokens[$i+$f] = ')'; #将右引号变为圆括号) $this->tokens[$i] = array(T_STRING, 'backticks', $line_nr); #将左反引号声明一个backticks的token // add element backticks() to array $this->tokens = array_merge( #对当前token进行重构 array_slice($this->tokens, 0, $i+1), array('('), #因为刚才将左反引号替换了,所以此时需要再添加一个左圆括号 array_slice($this->tokens, $i+1) #拼接上后面从右圆括号开始的token ); } }
#接下来要对一些条件语句、循环语句进行解析,主要为IF,else if,for,foreach,while // real token else if( is_array($this->tokens[$i]) ) { // rebuild if-clauses, for(), foreach(), while() without { } #首先重构没有花括号的,即只有方法体只有单条语句 if ( ($this->tokens[$i][0] === T_IF || $this->tokens[$i][0] === T_ELSEIF || $this->tokens[$i][0] === T_FOR || $this->tokens[$i][0] === T_FOREACH || $this->tokens[$i][0] === T_WHILE) && $this->tokens[$i+1] === '(' ) { // skip condition in ( ) #这个while主要是跳过条件判断(),继续扫描后面的token,对token数组并不做处理 $f=2; $braceopen = 1; while($braceopen !== 0 ) { if($this->tokens[$i+$f] === '(') $braceopen++; else if($this->tokens[$i+$f] === ')') $braceopen--; $f++; if(!isset($this->tokens[$i+$f])) { addError('Could not find closing parenthesis of '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename); break; } } // alternate syntax while(): endwhile; #这个if条件主要是为了给php的替代语法结构加上左右花括号,对于每一种条件或者循环关键字,都对应了相应的
if($this->tokens[$i+$f] === ':') { switch($this->tokens[$i][0]) { case T_IF: case T_ELSEIF: $endtoken = T_ENDIF; break; case T_FOR: $endtoken = T_ENDFOR; break; case T_FOREACH: $endtoken = T_ENDFOREACH; break; case T_WHILE: $endtoken = T_ENDWHILE; break; default: $endtoken = ';'; } $c=1; while( $this->tokens[$i+$f+$c][0] !== $endtoken) { $c++; if(!isset($this->tokens[$i+$f+$c])) { addError('Could not find end'.$this->tokens[$i][1].'; of alternate '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, $f+1), $this->tokens[$i][2], $this->filename); break; } } $this->wrapbraces($i+$f+1, $c+1, $i+$f+$c+2); }
#这个if条件主要是为了针对 if() echo "1";只有一条指令,将其放到花括号内 // if body not in { (and not a do ... while();) wrap next instruction in braces else if($this->tokens[$i+$f] !== '{' && $this->tokens[$i+$f] !== ';') { $c=1; while($this->tokens[$i+$f+$c] !== ';' && $c<$max) { $c++; } $this->wrapbraces($i+$f, $c+1, $i+$f+$c+1); } }
#为else if 添加花括号 // rebuild else without { } else if( $this->tokens[$i][0] === T_ELSE && $this->tokens[$i+1][0] !== T_IF && $this->tokens[$i+1] !== '{') { $f=2; while( $this->tokens[$i+$f] !== ';' && $f<$max) { $f++; } $this->wrapbraces($i+1, $f, $i+$f+1); }
// rebuild switch (): endswitch;
#switch语句的处理,和if while等差不多,先扫描跳过判断语句 else if( $this->tokens[$i][0] === T_SWITCH && $this->tokens[$i+1] === '(') { $newbraceopen = 1; $c=2; while( $newbraceopen !== 0 ) { // watch function calls in function call if( $this->tokens[$i + $c] === '(' ) { $newbraceopen++; } else if( $this->tokens[$i + $c] === ')' ) { $newbraceopen--; } else if(!isset($this->tokens[$i+$c]) || $this->tokens[$i + $c] === ';') { addError('Could not find closing parenthesis of switch-statement.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->filename); break; } $c++; }
#此时达到switch的方法体,因为switch一般来说是带花括号的,但对于endswitch的情况需要特殊处理一下,变为花括号形式 // switch(): ... endswitch; if($this->tokens[$i + $c] === ':') { $f=1; while( $this->tokens[$i+$c+$f][0] !== T_ENDSWITCH) #这里是通过f来找endswitch,c找: { $f++; if(!isset($this->tokens[$i+$c+$f])) { addError('Could not find endswitch; of alternate switch-statement.', array_slice($this->tokens, $i, $c+1), $this->tokens[$i][2], $this->filename); break; } } $this->wrapbraces($i+$c+1, $f+1, $i+$c+$f+2); } } // rebuild switch case: without { }
这一部分主要是将每一条case后面的全部补全花括号 else if( $this->tokens[$i][0] === T_CASE ) { $e=1; while($this->tokens[$i+$e] !== ':' && $this->tokens[$i+$e] !== ';') #找到分号 { $e++; if(!isset($this->tokens[$i+$e])) { addError('Could not find : or ; after '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename); break; } } $f=$e+1; if(($this->tokens[$i+$e] === ':' || $this->tokens[$i+$e] === ';') && $this->tokens[$i+$f] !== '{' && $this->tokens[$i+$f][0] !== T_CASE && $this->tokens[$i+$f][0] !== T_DEFAULT) { $newbraceopen = 0; while($newbraceopen || (isset($this->tokens[$i+$f]) && $this->tokens[$i+$f] !== '}' && !(is_array($this->tokens[$i+$f]) && ($this->tokens[$i+$f][0] === T_BREAK || $this->tokens[$i+$f][0] === T_CASE || $this->tokens[$i+$f][0] === T_DEFAULT || $this->tokens[$i+$f][0] === T_ENDSWITCH) ) )) { if($this->tokens[$i+$f] === '{') $newbraceopen++; else if($this->tokens[$i+$f] === '}') $newbraceopen--; $f++; if(!isset($this->tokens[$i+$f])) { addError('Could not find ending of '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, $e+5), $this->tokens[$i][2], $this->filename); break; } } if($this->tokens[$i+$f][0] === T_BREAK) { if($this->tokens[$i+$f+1] === ';') $this->wrapbraces($i+$e+1, $f-$e+1, $i+$f+2); // break 3; else $this->wrapbraces($i+$e+1, $f-$e+2, $i+$f+3); } else { # 无break的情况 $this->wrapbraces($i+$e+1, $f-$e-1, $i+$f); } $i++; } } // rebuild switch default: without { }
#针对default的情况,如果没有花括号,则添加花括号 else if( $this->tokens[$i][0] === T_DEFAULT && $this->tokens[$i+2] !== '{' ) { $f=2; $newbraceopen = 0; while( $this->tokens[$i+$f] !== ';' && $this->tokens[$i+$f] !== '}' || $newbraceopen ) { if($this->tokens[$i+$f] === '{') $newbraceopen++; else if($this->tokens[$i+$f] === '}') $newbraceopen--; $f++; if(!isset($this->tokens[$i+$f])) { addError('Could not find ending of '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename); break; } } $this->wrapbraces($i+2, $f-1, $i+$f+1); }
#函数名小写 // lowercase all function names because PHP doesn't care else if( $this->tokens[$i][0] === T_FUNCTION ) { $this->tokens[$i+1][1] = strtolower($this->tokens[$i+1][1]); }
#函数调用小写 else if( $this->tokens[$i][0] === T_STRING && $this->tokens[$i+1] === '(') { $this->tokens[$i][1] = strtolower($this->tokens[$i][1]); } // switch a do while with a while (the difference in loop rounds doesnt matter // and we need the condition to be parsed before the loop tokens) else if( $this->tokens[$i][0] === T_DO ) { $f=2; $otherDOs = 0; // f = T_WHILE token position relative to i
while( $this->tokens[$i+$f][0] !== T_WHILE || $otherDOs ) {
#忽略内层的DO while体 if($this->tokens[$i+$f][0] === T_DO) $otherDOs++; else if($this->tokens[$i+$f][0] === T_WHILE) $otherDOs--; $f++; #用f来标志找到的while位置 if(!isset($this->tokens[$i+$f])) { addError('Could not find WHILE of DO-WHILE-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename); break; } } // rebuild do while without {} (should never happen but we want to be sure)
if($this->tokens[$i+1] !== '{') { $this->wrapbraces($i+1, $f-1, $i+$f); // by adding braces we added two new tokens $f+=2; #因为在最外层的while前加了两个花括号占位,因此f+2才代表while的位置 } #d代表while后的分号位置,这样不改变f所指的位置方便后面替换结构 $d=1; // d = END of T_WHILE condition relative to i while( $this->tokens[$i+$f+$d] !== ';' && $d<$max ) { $d++; } #对do while语句进行重构,变成while结构 // reorder tokens and replace DO WHILE with WHILE $this->tokens = array_merge( array_slice($this->tokens, 0, $i), // before DO array_slice($this->tokens, $i+$f, $d), // WHILE condition f指向while d指向while结束 array_slice($this->tokens, $i+1, $f-1), // DO WHILE loop tokens i指向do循环体,f-1即内容结束 array_slice($this->tokens, $i+$f+$d+1, count($this->tokens)) // rest of tokens without while condition while之后的token数组 ); } } } // return tokens with rearranged key index $this->tokens = array_values($this->tokens); }
function fix_ternary() { for($i=0,$max=count($this->tokens); $i<$max; $i++) { if( $this->tokens[$i] === '?' ) { unset($this->tokens[$i]); // condition in brackets: fine, delete condition if($this->tokens[$i-1] === ')') { #先找到)右括号,然后减f一直找到左括号,一直unset到左括号( unset($this->tokens[$i-1]); // delete tokens till ( $newbraceopen = 1; $f = 2; while( $newbraceopen !== 0 && $this->tokens[$i - $f] !== ';') { if( $this->tokens[$i - $f] === '(' ) { $newbraceopen--; } else if( $this->tokens[$i - $f] === ')' ) { $newbraceopen++; } unset($this->tokens[$i - $f]); $f++; if(($i-$f)<0) { addError('Could not find opening parenthesis in ternary operator (1).', array_slice($this->tokens, $i-5, 10), $this->tokens[$i+1][2], $this->filename); break; } } #判断左括号左边是否是!或是自定义函数调用或者是isset、empty函数调用 //delete token before, if T_STRING if($this->tokens[$i-$f] === '!' || (is_array($this->tokens[$i-$f]) && ($this->tokens[$i-$f][0] === T_STRING || $this->tokens[$i-$f][0] === T_EMPTY || $this->tokens[$i-$f][0] === T_ISSET))) { unset($this->tokens[$i-$f]); } } // condition is a check or assignment
#判断问号之前是不是逻辑比较,是的话肯定有操作数,需要unset掉 else if(in_array($this->tokens[$i-2][0], Tokens::$T_ASSIGNMENT) || in_array($this->tokens[$i-2][0], Tokens::$T_OPERATOR) ) { // remove both operands unset($this->tokens[$i-1]); #右操作数删除 unset($this->tokens[$i-2]); #删除运算符 // if operand is in braces if($this->tokens[$i-3] === ')')
#判断左边是否是函数调用,跟上面unset过程差不多,理想情况下是a()==1,但是对于1==a()没有考虑进去,因此对于这种unset并不能完全消除token,就直接走上面第一种a()这种形式的token解析 { // delete tokens till ( $newbraceopen = 1; $f = 4; while( $newbraceopen !== 0 ) { if( $this->tokens[$i - $f] === '(' ) { $newbraceopen--; } else if( $this->tokens[$i - $f] === ')' ) { $newbraceopen++; } unset($this->tokens[$i - $f]); $f++; if(($i-$f)<0 || $this->tokens[$i - $f] === ';') { addError('Could not find opening parenthesis in ternary operator (2).', array_slice($this->tokens, $i-8, 6), $this->tokens[$i+1][2], $this->filename); break; } } #删除函数调用 //delete token before, if T_STRING if(is_array($this->tokens[$i-$f]) && ($this->tokens[$i-$f][0] === T_STRING || $this->tokens[$i-$f][0] === T_EMPTY || $this->tokens[$i-$f][0] === T_ISSET)) { unset($this->tokens[$i-$f]); } } unset($this->tokens[$i-3]); } // condition is a single variable, delete
#对于单变量 $a? unset掉
else if(is_array($this->tokens[$i-1]) && $this->tokens[$i-1][0] === T_VARIABLE) { unset($this->tokens[$i-1]); } } } // return tokens with rearranged key index $this->tokens = array_values($this->; }