if (preg_match('/
{
$str = preg_replace_callback('#
]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
}
if (preg_match('/script|xss/i', $str))
{
$str = preg_replace('#*(?:script|xss).*?>#si', '[removed]', $str);
}
}
while ($original !== $str);
unset($original);
/*
* Sanitize naughty HTML elements
*
净化HTML元素
* If a tag containing any of the words in the list
* below is found, the tag gets converted to entities.
*
* So this:
* Becomes: <blink>
*/
$pattern = '#'
.'<((?/*\s*)((?[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
// optional attributes
.'(?
(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
.'[^\s\042\047>/=]+' // attribute characters
// optional attribute-value
.'(?:\s*=' // attribute-value separator
.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
.')?' // end optional attribute-value group
.')*)' // end optional attributes group
.'[^>]*)(?\>)?#isS';
// Note: It would be nice to optimize this for speed, BUT
// only matching the naughty elements here results in
// false positives and in turn - vulnerabilities!
do
{
$old_str = $str;
$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
}
while ($old_str !== $str);
unset($old_str);
/*
* Sanitize naughty scripting elements
*
净化脚本元素
* Similar to above, only instead of looking for
* tags it looks for PHP and JavaScript commands
* that are disallowed. Rather than removing the
* code, it simply converts the parenthesis to entities
* rendering the code un-executable.
*
* For example: eval('some code')
* Becomes: eval('some code')
*/
$str = preg_replace(
'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
'\\1\\2(\\3)',
$str
);
// Final clean up
// This adds a bit of extra precaution in case
// something got through the above filters
$str = $this->_do_never_allowed($str);
/*
* Images are Handled in a Special Way
* - Essentially, we want to know that after all of the character
* conversion is done whether any unwanted, likely XSS, code was found.
* If not, we return TRUE, as the image is clean.
* However, if the string post-conversion does not matched the
* string post-removal of XSS, then it fails, as there was unwanted XSS
* code found and removed/changed during processing.
*/
if ($is_image === TRUE)
{
return ($str === $converted_string);
}
return $str;
}
// --------------------------------------------------------------------
/**
* XSS Hash
*
跨站脚本攻击哈希
* Generates the XSS hash if needed and returns it.
*
* @see CI_Security::$_xss_hash
* @return string XSS hash
*/
public function xss_hash()
{
if ($this->_xss_hash === NULL)
{
$rand = $this->get_random_bytes(16);
$this->_xss_hash = ($rand === FALSE)
? md5(uniqid(mt_rand(), TRUE))
: bin2hex($rand);
}
return $this->_xss_hash;
}
// --------------------------------------------------------------------
/**
* Get random bytes
*
得到随机字节
* @param int $length Output length
* @return string
*/
public function get_random_bytes($length)
{
ctype_digit — 做纯数字检测
if (empty($length) OR ! ctype_digit((string) $length))
{
return FALSE;
}
if (function_exists('random_bytes'))
{
try
{
// The cast is required to avoid TypeError
return random_bytes((int) $length);
}
catch (Exception $e)
{
// If random_bytes() can't do the job, we can't either ...
// There's no point in using fallbacks.
log_message('error', $e->getMessage());
return FALSE;
}
}
// Unfortunately, none of the following PRNGs is guaranteed to exist ...
if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
{
return $output;
}
if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
{
// Try not to waste entropy ...
is_php('5.4') && stream_set_chunk_size($fp, $length);
$output = fread($fp, $length);
fclose($fp);
if ($output !== FALSE)
{
return $output;
}
}
if (function_exists('openssl_random_pseudo_bytes'))
{
return openssl_random_pseudo_bytes($length);
}
return FALSE;
}
// --------------------------------------------------------------------
/**
* HTML Entities Decode
*
html实体解码
* A replacement for html_entity_decode()
*html_entity_decode() 函数把 HTML 实体转换为字符。
* The reason we are not using html_entity_decode() by itself is because
* while it is not technically correct to leave out the semicolon(分号)
* at the end of an entity most browsers will still interpret the entity
* correctly. html_entity_decode() does not convert entities without
* semicolons, so we are left with our own little solution here. Bummer.
*
* @link http://php.net/html-entity-decode
*
* @param string $str Input
* @param string $charset Character set
* @return string
*/
public function entity_decode($str, $charset = NULL)
{
if (strpos($str, '&') === FALSE)
{
return $str;
}
static $_entities;
isset($charset) OR $charset = $this->charset;
$flag = is_php('5.4')
? ENT_COMPAT | ENT_HTML5
: ENT_COMPAT;
if ( ! isset($_entities))
{
$_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
// entities to the array manually
if ($flag === ENT_COMPAT)
{
$_entities[':'] = ':';
$_entities['('] = '(';
$_entities[')'] = ')';
$_entities["\n"] = '
';
$_entities["\t"] = '	';
}
}
do
{
$str_compare = $str;
// Decode standard entities, avoiding false positives
if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
{
$replace = array();
$matches = array_unique(array_map('strtolower', $matches[0]));
foreach ($matches as &$match)
{
if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
{
$replace[$match] = $char;
}
}
$str = str_replace(array_keys($replace), array_values($replace), $str);
}
// Decode numeric & UTF16 two byte entities
$str = html_entity_decode(
preg_replace('/((?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
$flag,
$charset
);
if ($flag === ENT_COMPAT)
{
$str = str_replace(array_values($_entities), array_keys($_entities), $str);
}
}
while ($str_compare !== $str);
return $str;
}
// --------------------------------------------------------------------
/**
* Sanitize Filename
*
净化文件名
* @param string $str Input file name
* @param bool $relative_path Whether to preserve paths
* @return string
*/
public function sanitize_filename($str, $relative_path = FALSE)
{
$bad = $this->filename_bad_chars;
if ( ! $relative_path)
{
$bad[] = './';
$bad[] = '/';
}
$str = remove_invisible_characters($str, FALSE);
do
{
$old = $str;
$str = str_replace($bad, '', $str);
}
while ($old !== $str);
return stripslashes($str);
}
// ----------------------------------------------------------------
/**
* Strip Image Tags
*去掉image标签
* @param string $str
* @return string
*/
public function strip_image_tags($str)
{
return preg_replace(
array(
'#
#i',
'#
`]+)).*?\>#i'
),
'\\2',
$str
);
}
// ----------------------------------------------------------------
/**
* URL-decode taking spaces into account
*
* @see https://github.com/bcit-ci/CodeIgniter/issues/4877
* @param array $matches
* @return string
*/
protected function _urldecodespaces($matches)
{
$input = $matches[0];
$nospaces = preg_replace('#\s+#', '', $input);
return ($nospaces === $input)
? $input
: rawurldecode($nospaces);
}
// ----------------------------------------------------------------
/**
* Compact Exploded Words
*
压缩分离的字母
* Callback method for xss_clean() to remove whitespace from
* things like 'j a v a s c r i p t'.
*
* @used-by CI_Security::xss_clean()
* @param array $matches
* @return string
*/
protected function _compact_exploded_words($matches)
{
return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
}
// --------------------------------------------------------------------
/**
* Sanitize Naughty HTML
*
净化HTML元素
* Callback method for xss_clean() to remove naughty HTML elements.
*
* @used-by CI_Security::xss_clean()
* @param array $matches
* @return string
*/
protected function _sanitize_naughty_html($matches)
{
static $naughty_tags = array(
'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
);
static $evil_attributes = array(
'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
);
// First, escape unclosed tags
if (empty($matches['closeTag']))
{
return '<'.$matches[1];
}
// Is the element that we caught naughty? If so, escape it
elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
{
return '<'.$matches[1].'>';
}
// For other tags, see if their attributes are "evil" and strip those
elseif (isset($matches['attributes']))
{
// We'll store the already fitlered attributes here
$attributes = array();
// Attribute-catching pattern
$attributes_pattern = '#'
.'(?[^\s\042\047>/=]+)' // attribute characters
// optional attribute-value
.'(?:\s*=(?[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
.'#i';
// Blacklist pattern for evil attribute names
$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
// Each iteration filters a single attribute
do
{
// Strip any non-alpha characters that may precede an attribute.
// Browsers often parse these incorrectly and that has been a
// of numerous XSS issues we've had.
$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
{
// No (valid) attribute found? Discard everything else inside the tag
break;
}
if (
// Is it indeed an "evil" attribute?
preg_match($is_evil_pattern, $attribute['name'][0])
// Or does it have an equals sign, but no value and not quoted? Strip that too!
OR (trim($attribute['value'][0]) === '')
)
{
$attributes[] = 'xss=removed';
}
else
{
$attributes[] = $attribute[0][0];
}
$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
}
while ($matches['attributes'] !== '');
$attributes = empty($attributes)
? ''
: ' '.implode(' ', $attributes);
return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
}
return $matches[0];
}
// --------------------------------------------------------------------
/**
* JS Link Removal(清除,移除)
*
* Callback method for xss_clean() to sanitize links.
*
调用xss_clean()净化链接
* This limits the PCRE backtracks, making it more performance friendly
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
* PHP 5.2+ on link-heavy strings.
*
* @used-by CI_Security::xss_clean()
* @param array $match
* @return string
*/
protected function _js_link_removal($match)
{
return str_replace(
$match[1],
preg_replace(
'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|