if (preg_match('/
$str = preg_replace_callback('#
]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
if (preg_match('/script|xss/i', $str))
$str = preg_replace('##si', '[removed]', $str);
while ($original !== $str);
* Sanitize naughty HTML elements
* If a tag containing any of the words in the list
* below is found, the tag gets converted to entities.
* So this:
* Becomes: <blink>
$pattern = '#'
.'<((?/*\s*)((?[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
// optional attributes
(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
.'[^\s\042\047>/=]+' // attribute characters
// optional attribute-value
.'(?:\s*=' // attribute-value separator
.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
.')?' // end optional attribute-value group
.')*)' // end optional attributes group
// Note: It would be nice to optimize this for speed, BUT
// only matching the naughty elements here results in
// false positives and in turn - vulnerabilities!
$old_str = $str;
$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
while ($old_str !== $str);
* Sanitize naughty scripting elements
* Similar to above, only instead of looking for
* tags it looks for PHP and JavaScript commands
* that are disallowed. Rather than removing the
* code, it simply converts the parenthesis to entities
* rendering the code un-executable.
* For example: eval('some code')
* Becomes: eval('some code')
$str = preg_replace(
// Final clean up
// This adds a bit of extra precaution in case
// something got through the above filters
$str = $this->_do_never_allowed($str);
* Images are Handled in a Special Way
* - Essentially, we want to know that after all of the character
* conversion is done whether any unwanted, likely XSS, code was found.
* If not, we return TRUE, as the image is clean.
* However, if the string post-conversion does not matched the
* string post-removal of XSS, then it fails, as there was unwanted XSS
* code found and removed/changed during processing.
if ($is_image === TRUE)
return ($str === $converted_string);
return $str;
// --------------------------------------------------------------------
* XSS Hash
* Generates the XSS hash if needed and returns it.
* @see CI_Security::$_xss_hash
* @return string XSS hash
public function xss_hash()
if ($this->_xss_hash === NULL)
$rand = $this->get_random_bytes(16);
$this->_xss_hash = ($rand === FALSE)
? md5(uniqid(mt_rand(), TRUE))
: bin2hex($rand);
return $this->_xss_hash;
// --------------------------------------------------------------------
* Get random bytes
* @param int $length Output length
* @return string
public function get_random_bytes($length)
ctype_digit — 做纯数字检测
if (empty($length) OR ! ctype_digit((string) $length))
return FALSE;
if (function_exists('random_bytes'))
// The cast is required to avoid TypeError
return random_bytes((int) $length);
catch (Exception $e)
// If random_bytes() can't do the job, we can't either ...
// There's no point in using fallbacks.
log_message('error', $e->getMessage());
return FALSE;
// Unfortunately, none of the following PRNGs is guaranteed to exist ...
if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
return $output;
if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
// Try not to waste entropy ...
is_php('5.4') && stream_set_chunk_size($fp, $length);
$output = fread($fp, $length);
if ($output !== FALSE)
return $output;
if (function_exists('openssl_random_pseudo_bytes'))
return openssl_random_pseudo_bytes($length);
return FALSE;
// --------------------------------------------------------------------
* HTML Entities Decode
* A replacement for html_entity_decode()
*html_entity_decode() 函数把 HTML 实体转换为字符。
* The reason we are not using html_entity_decode() by itself is because
* while it is not technically correct to leave out the semicolon(分号)
* at the end of an entity most browsers will still interpret the entity
* correctly. html_entity_decode() does not convert entities without
* semicolons, so we are left with our own little solution here. Bummer.
* @link http://php.net/html-entity-decode
* @param string $str Input
* @param string $charset Character set
* @return string
public function entity_decode($str, $charset = NULL)
if (strpos($str, '&') === FALSE)
return $str;
static $_entities;
isset($charset) OR $charset = $this->charset;
$flag = is_php('5.4')
if ( ! isset($_entities))
$_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
// entities to the array manually
if ($flag === ENT_COMPAT)
$_entities[':'] = ':';
$_entities['('] = '(';
$_entities[')'] = ')';
$_entities["\n"] = '
$_entities["\t"] = ' ';
$str_compare = $str;
// Decode standard entities, avoiding false positives
if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
$replace = array();
$matches = array_unique(array_map('strtolower', $matches[0]));
foreach ($matches as &$match)
if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
$replace[$match] = $char;
$str = str_replace(array_keys($replace), array_values($replace), $str);
// Decode numeric & UTF16 two byte entities
$str = html_entity_decode(
preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
if ($flag === ENT_COMPAT)
$str = str_replace(array_values($_entities), array_keys($_entities), $str);
while ($str_compare !== $str);
return $str;
// --------------------------------------------------------------------
* Sanitize Filename
* @param string $str Input file name
* @param bool $relative_path Whether to preserve paths
* @return string
public function sanitize_filename($str, $relative_path = FALSE)
$bad = $this->filename_bad_chars;
if ( ! $relative_path)
$bad[] = './';
$bad[] = '/';
$str = remove_invisible_characters($str, FALSE);
$old = $str;
$str = str_replace($bad, '', $str);
while ($old !== $str);
return stripslashes($str);
// ----------------------------------------------------------------
* Strip Image Tags
* @param string $str
* @return string
public function strip_image_tags($str)
return preg_replace(
// ----------------------------------------------------------------
* URL-decode taking spaces into account
* @see https://github.com/bcit-ci/CodeIgniter/issues/4877
* @param array $matches
* @return string
protected function _urldecodespaces($matches)
$input = $matches[0];
$nospaces = preg_replace('#\s+#', '', $input);
return ($nospaces === $input)
? $input
: rawurldecode($nospaces);
// ----------------------------------------------------------------
* Compact Exploded Words
* Callback method for xss_clean() to remove whitespace from
* things like 'j a v a s c r i p t'.
* @used-by CI_Security::xss_clean()
* @param array $matches
* @return string
protected function _compact_exploded_words($matches)
return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
// --------------------------------------------------------------------
* Sanitize Naughty HTML
* Callback method for xss_clean() to remove naughty HTML elements.
* @used-by CI_Security::xss_clean()
* @param array $matches
* @return string
protected function _sanitize_naughty_html($matches)
static $naughty_tags = array(
'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
static $evil_attributes = array(
'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
// First, escape unclosed tags
if (empty($matches['closeTag']))
return '<'.$matches[1];
// Is the element that we caught naughty? If so, escape it
elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
return '<'.$matches[1].'>';
// For other tags, see if their attributes are "evil" and strip those
elseif (isset($matches['attributes']))
// We'll store the already fitlered attributes here
$attributes = array();
// Attribute-catching pattern
$attributes_pattern = '#'
.'(?[^\s\042\047>/=]+)' // attribute characters
// optional attribute-value
.'(?:\s*=(?[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
// Blacklist pattern for evil attribute names
$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
// Each iteration filters a single attribute
// Strip any non-alpha characters that may precede an attribute.
// Browsers often parse these incorrectly and that has been a
// of numerous XSS issues we've had.
$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
// No (valid) attribute found? Discard everything else inside the tag
if (
// Is it indeed an "evil" attribute?
preg_match($is_evil_pattern, $attribute['name'][0])
// Or does it have an equals sign, but no value and not quoted? Strip that too!
OR (trim($attribute['value'][0]) === '')
$attributes[] = 'xss=removed';
$attributes[] = $attribute[0][0];
$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
while ($matches['attributes'] !== '');
$attributes = empty($attributes)
? ''
: ' '.implode(' ', $attributes);
return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
return $matches[0];
// --------------------------------------------------------------------
* JS Link Removal(清除,移除)
* Callback method for xss_clean() to sanitize links.
* This limits the PCRE backtracks, making it more performance friendly
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
* PHP 5.2+ on link-heavy strings.
* @used-by CI_Security::xss_clean()
* @param array $match
* @return string
protected function _js_link_removal($match)
return str_replace(