配置文件 - config/config.ini.php
sitemap主文件 - SiteMap.class.php
* @version 1.0
namespace Maweibinguo\SiteMap;
class SiteMap
const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
* @var webUrlList
* @access public
public $webUrlList = array();
* @var siteMapList
* @access public
public $siteMapList = array();
* @var isUseCookie
* @access public
public $isUseCookie = false;
* @var cookieFilePath
* @access public
public $cookieFilePath = '';
* @var xmlWriter
* @access private
private $_xmlWriter = '';
* init basic config
* @access public
public function __construct()
$this->_xmlWriter = new \XMLWriter();
$result = $this->_enviromentTest();
* test the enviroment for the script
* @access pirvate
private function _enviromentTest()
$sapiType = \php_sapi_name ();
if( strtolower($sapiType) != 'cli' ) {
echo ' The Script Must Run In Command Lines ', "\r\n";
* load the configValue for genrating sitemap by configname
* @param string $configName
* @return string $configValue
* @access public
public function loadConfig($configName)
/* init return value */
$configValue = '';
/* load config value */
$configPath = __DIR__ . '/config/config.ini.php';
if(file_exists( $configPath )) {
require $configPath;
} else {
echo "Can not find config file", "\r\n";
$configValue = $$configName;
/* return config value */
return $configValue;
* generate sitemap.xml for the web
* @param siteMapList
* @access public
public function generateSiteMapXml($siteMapList)
/* init return result */
$result = false;
if( !is_array($siteMapList) || count($siteMapList) <= 0 ) {
echo 'The SiteMap Cotent Is Empty',"\r\n";
/* check the parameter */
$siteMapPath = $this->loadConfig('SITEMAPPATH');
if(!file_exists($siteMapPath)) {
$commandStr = "touch ${siteMapPath}";
if( !is_writable($siteMapPath) ) {
echo 'Is Not Writeable',"\r\n";
$this->_xmlWriter->startDocument('1.0', 'UTF-8');
$this->_xmlWriter->writeAttribute('xmlns', self::SCHEMA);
foreach($siteMapList as $siteMapKey => $siteMapItem) {
$changefreq = !empty($siteMapItem['ChangeFreq']) ? $siteMapItem['ChangeFreq'] : 'Daily';
$priority = !empty($siteMapItem['Priority']) ? $siteMapItem['Priority'] : 0.5;
/* return return */
return $result;
* start to send request to the target url, and get the reponse
* @param string $targetUrl
* @return mixed $returnData
* @access public
public function sendRequest($url)
/* init return value */
$responseData = false;
/* check the parameter */
if( !filter_var($url, FILTER_VALIDATE_URL) ) {
return $responseData;
$connectTimeOut = $this->loadConfig('CURLOPT_CONNECTTIMEOUT');
if( $connectTimeOut === false ) {
return $responseData;
$timeOut = $this->loadConfig('CURLOPT_TIMEOUT');
if( $timeOut === false ) {
return $responseData;
$handle = curl_init();
curl_setopt($handle, CURLOPT_URL, $url);
curl_setopt($handle, CURLOPT_HEADER, false);
curl_setopt($handle, CURLOPT_AUTOREFERER, true);
curl_setopt($handle, CURLOPT_RETURNTRANSFER , true);
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, $connectTimeOut);
curl_setopt($handle, CURLOPT_TIMEOUT, $timeOut);
curl_setopt($handle, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)" );
$headersItem = array( 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Connection: Keep-Alive' );
curl_setopt($handle, CURLOPT_HTTPHEADER, $headersItem);
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 1);
$cookieList = $this->loadConfig('COOKIELIST');
$isUseCookie = $cookieList['IsUseCookie'];
$cookieFilePath = $cookieList['CookiePath'];
if($isUseCookie) {
if(!file_exists($cookieFilePath)) {
$touchCommand = " touch {$cookieFilePath} ";
curl_setopt($handle, CURLOPT_COOKIEFILE, $cookieFilePath);
curl_setopt($handle, CURLOPT_COOKIEJAR, $cookieFilePath);
$responseData = curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
if($httpCode != 200) {
$responseData = false;
/* return response data */
return $responseData;
* get the sitemap content of the url, it contains url, title, priority, changefreq
* @param string $url
* @access public
public function generateSiteMapList($url)
$content = $this->sendRequest($url);
if($content !== false) {
$tagsList = $this->_parseContent($content, $url);
$urlItem = $tagsList['UrlItem'];
$title = $tagsList['Title'];
$siteMapItem = array( 'Url' => trim($url),
'Title' => trim($title) );
$priority = $this->_calculatePriority($siteMapItem['Url']);
$siteMapItem['Priority'] = $priority;
$changefreq = $this->_calculateChangefreq($siteMapItem['Url']);
$siteMapItem['ChangeFreq'] = $changefreq;
$this->siteMapList[] = $siteMapItem;
foreach($urlItem as $nextUrl) {
if( !in_array($nextUrl, $this->webUrlList) ) {
$skipUrlList = $this->loadConfig('SKIP_URLLIST');
foreach($skipUrlList as $keyWords) {
if( stripos($nextUrl, $keyWords) !== false ) {
continue 2;
$this->webUrlList[] = $nextUrl;
echo $nextUrl,"\r\n";
*teChangefreq get sitemaplist of the web
* @access public
* @return array $siteMapList
public function getSiteMapList()
return $this->siteMapList;
* calate the priority of the targeturl
* @param string $targetUrl
* @return float $priority
* @access private
private function _calculatePriority($targetUrl)
/* init priority */
$priority = 0.5;
/* calculate the priority */
if( filter_var($targetUrl, FILTER_VALIDATE_URL) ) {
$priorityList = $this->loadConfig('PRIORITYLIST');
foreach($priorityList as $priorityKey => $priorityValue) {
if(stripos($targetUrl, $priorityKey) !== false) {
$priority = $priorityValue;
/* return priority */
return $priority;
* calate the changefreq of the targeturl
* @param string $targetUrl
* @return float $changefreq
* @access private
private function _calculateChangefreq($targetUrl)
/* init changefreq*/
$changefreq = 'Daily';
/* calculate the priority */
if( filter_var($targetUrl, FILTER_VALIDATE_URL) ) {
$changefreqList = $this->loadConfig('CHANGEFREQLIST');
foreach($changefreqList as $changefreqKey => $changefreqValue) {
if(stripos($targetUrl, $changefreqKey) !== false) {
$changefreq = $changefreqValue;
/* return priority */
return $changefreq;
* format url
* @param $url
* @param $orginUrl
* @access private
* @return $formatUrl
private function _formatUrl($url, $originUrl)
/* init url */
$formatUrl = '';
/* format url */
if( !empty($url) && !empty($originUrl) ) {
$badUrlItem = array( '\\',
'/' ,
'' );
$formatUrl = trim($url);
$formatUrl = trim($formatUrl, '#');
$formatUrl = trim($formatUrl, '\'');
$formatUrl = trim($formatUrl, '"');
if(stripos($formatUrl, 'http') === false && !in_array($formatUrl, $badUrlItem)) {
if(strpos($formatUrl, '/') === 0) {
$domainName = $this->loadConfig('DOMAIN_NAME');
$formatUrl = $domainName . trim($formatUrl, '/');
} else {
$formatUrl = substr( $originUrl, 0, strrpos($originUrl, '/') ) .'/'. $formatUrl;
} elseif( stripos($formatUrl, 'http') === false && in_array($formatUrl, $badUrlItem) ) {
$formatUrl = '';
/* return url */
return $formatUrl;
* check domain is right
* @param $url
* @return $url
* @access private
private function _checkDomain($url)
/* init url */
$result = false;
/* check domain */
if($url) {
$domainName = $this->loadConfig('DOMAIN_NAME');
if( stripos($url, $domainName) === false ) {
return $result;
$result = true;
/* return url */
return $result;
* parse the response content, so that we can get the urls
* @param string $content
* @param string $originUrl
* @return array $urlItem
* @access public
public function _parseContent($content, $originUrl)
/* init return data */
$tagsList = array();
/* start parse */
if( !empty($content) && !empty($originUrl) ) {
$domainName = $this->loadConfig('DOMAIN_NAME');
/* get the attribute of href for tags */
$regStrForTagA = '#<\s*a\s+href\s*=\s*(".*?"|\'.*?\')#um';
if( preg_match_all($regStrForTagA, $content, $matches) ) {
$urlItem = array_unique($matches[1]);
foreach($urlItem as $urlKey => $url) {
$formatUrl = $this->_formatUrl($url, $originUrl);
if( empty($formatUrl) ) {
$result = $this->_checkDomain($formatUrl);
if($result === false) {
$urlItem[$urlKey] = $formatUrl;
$tagsList['UrlItem'] = $urlItem;
/* get the title tags content */
$regStrForTitle = '#<\s*title\s*>(.*?)<\s*\/\s*title\s*>#um';
if( preg_match($regStrForTitle, $content, $matches) ) {
$title = $matches[1];
$tagsList['Title'] = $title;
/* return tagsList */
return $tagsList;
/* here is a example */
$startTime = microtime(true);
echo "/***********************************************************************/","\r\n";
echo "/* start to run {$startTime} */","\r\n";
echo "/***********************************************************************/","\r\n\r\n";
$siteMap = new SiteMap();
$domain = $siteMap->loadConfig('DOMAIN_NAME');
$siteMapList = $siteMap->getSiteMapList();
$endTime = microtime(true);
$takeTime = $endTime - $startTime;
echo "/***********************************************************************/","\r\n";
echo "/* Had Done, \t it total take {$takeTime} */","\r\n";
echo "/***********************************************************************/","\r\n";
'CookiePath' => '/tmp/sitemapcookie' );
$SITEMAPPATH = './sitemap.xml';
$PRIORITYLIST = array( 'product' => '0.8',
'device' => '0.6',
'intelligent' => '0.4',
'course' => '0.2' );
$CHANGEFREQLIST = array( 'product' => 'Always',
'device' => 'Hourly',
'intelligent' => 'Daily',
'course' => 'Weekly',
'login' => 'Monthly',
'about' => 'Yearly' );
单击下载源代码 (提取码:fc1c)