1.去除html标记
function Text2Html($txt){
$txt = str_replace(" "," ",$txt);
$txt = str_replace("<","<",$txt);
$txt = str_replace(">",">",$txt);
$txt = preg_replace("/[\r\n]{1,}/isU","
\r\n",$txt);
return $txt;
}
2.相对路径转绝对路径
function relative_to_absolute($content, $feed_url) {
preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
$server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
$server_url = preg_replace("/\/.*/", "", $server_url);
if ($server_url == '') {
return $content;
}
if (isset($protocol[0])) {
$new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);
$new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);
} else {
$new_content = $content;
}
return $new_content;
}
3.取得所有链接
function get_all_url($code){
preg_match_all('/"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
return array('name'=>$arr[2],'url'=>$arr[1]);
}
4..获取指定标记中的内容
function get_tag_data($str, $start, $end){
if ( $start == '' || $end == '' ){
return;
}
$str = explode($start, $str);
$str = explode($end, $str[1]);
return $str[0];
}
5.获取远程文件内容(抓内容)
/**
获取远程文件内容
@param $url 文件http地址
*/
function fopen_url($url)
{
if (function_exists('file_get_contents')) {
$file_content = @file_get_contents($url);
} elseif (ini_get('allow_url_fopen') && ($file = @fopen($url, 'rb'))){
$i = 0;
while (!feof($file) && $i++ < 1000) {
$file_content .= strtolower(fread($file, 4096));
}
fclose($file);
} elseif (function_exists('curl_init')) {
$curl_handle = curl_init();
curl_setopt($curl_handle, CURLOPT_URL, $url);
curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT,2);
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER,1);
curl_setopt($curl_handle, CURLOPT_FAILONERROR,1);
curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Trackback Spam Check');
$file_content = curl_exec($curl_handle);
curl_close($curl_handle);
} else {
$file_content = '';
}
return $file_content;
}
6.去掉指定的标签函数
/*
$str = "ertetaaaabbbb
aaadf";
echo _strip_tags(array("a","img"),$str);
*/
function _strip_tags($tags_a,$str)
{
foreach ($tags_a as $tag)
{
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
}
$return_str = preg_replace($p,"",$str);
return $return_str;
}