php 实现从其他网站拷贝的富文本内容并将里面的图片抓取到本地

 
  

public function saveImgFromList($content, $dist, $url)
	{
		$list = $this->getImgByReg($content);
		$accessUrl  = $this->getServiceLocator()->get('access_upload_message');

		foreach ($list as $key => $val) {
		    if ( strpos($val['src'], $accessUrl) !== false ) {
		        $arr = explode('/', $val['src']);
		        $name = array_pop($arr);
		        $list[$key]['src'] = $name;
		    	continue;
		    }
			$arr = explode('.', $val['src']);
			$ext = array_pop($arr);
			if (!$ext || !in_array($ext, self::$imgExt)) {
			    $ext = 'jpg';
			}
        	$name = md5(uniqid()) . '.' . $ext;
        	$list[$key]['src'] = $name;
	
    	    $file = file_get_contents($val['src']);
    	    file_put_contents($dist . $name, $file);
		}
		
		$newImgInfo = $this->replaceImg($list, $url);
		$newImgTags = $newImgInfo['newImgTags'];
		$newImgUrls = $newImgInfo['newImgUrls'];
		
		$patterns = array('//');
		$callback = function( $matches ) use ( &$newImgTags ) {
			$matches[0] = array_shift($newImgTags);
			return $matches[0];
		};
		
		$res = array();
		$res['content'] = preg_replace_callback($patterns, $callback, $content);
		$res['image_urls'] = $newImgUrls;
		
		return $res;
	}


function getImgByReg($str)

{$list = array();$c1 = preg_match_all('//', $str, $m1);for($i = 0; $i < $c1; $i++) {$c2 = preg_match_all('/(\w+)\s*=\s*(?:(?:(["\'])(.*?)(?=\2))|([^\/\s]*))/', $m1[0][$i], $m2); for($j = 0; $j < $c2; $j++) {$list[$i][$m2[1][$j]] = !empty($m2[4][$j]) ? $m2[4][$j] : $m2[3][$j];}}return $list;}
 
  
	function replaceImg($list, $url)
	{
	    $newImgTags = array();
	    $newImgUrls = array();
	    
	    foreach ($list as $key => $val) {
	    	$imgTag = ' $v) {
	    		if ($attr === 'src') {
	    			$imgTag .= $attr . '="' . $url . $v . '" ';
	    			$newImgUrls[] = $url . $v;
	    		} else {
	    			$imgTag .= $attr . '="' . $v . '" ';
	    		}
	    	}
	    	$imgTag .= ' >';
	    
	    	$newImgTags[$key] = $imgTag;
	    }
	    
	    return array('newImgTags' => $newImgTags, 'newImgUrls' => $newImgUrls);
	}

// 模拟使用
//你想要保存图片的目录
$dist = '/User/www/img/' . date('/Y/m/d');
!is_dir($dist) && mkdir($dist, 0777, true);
define('URLHOLDER', '{{urlholer}}');
// 你的图片服务器或目录地址
$url = URLHOLDER . '/img/' . date('/Y/m/');

// 这是模拟你需要替换的用户提交的富文本内容 里面包含图片地址

$content = '

Push Pop Pressè‡´åŠ›äºŽåˆ›é€ ä¸€ä¸ªé€¼çœŸçš„ã€å……满物ç†æ•ˆåº”的体验。POP就是在这个ç†å¿µä¸‹å‚¬ç”Ÿå‡ºæ¥çš„新一代æˆæžœã€‚

POP使用Objective-C++编写。Objective-C++是对C++的扩展,就åƒObjective-C是Cçš„æ‰©å±•ä¸€æ ·ã€‚è€Œè‡³äºŽä¸ºä»€ä¹ˆä»–ä»¬ç”¨Objective-C++而ä¸æ˜¯çº¯ç²¹çš„Objective-Cï¼ŒåŽŸå› åœ¨äºŽä»–ä»¬æ›´å–œæ¬¢Objective-C++的语法特性所æ供的便利。

POP的架构

POPç›®å‰ç”±å››ä¸ªéƒ¨åˆ†ç»„æˆï¼ˆå¦‚图1所示),å³Animationsã€Engineã€Utilityã€WebCore。

图1  POP架构图

POP动画æžä¸ºæµç•…,其秘密就在于这个引擎中的POPAnimator。POP通过CADisplayLink让动画实现了60 FPSçš„æµç•…æ•ˆæžœï¼Œæ‰“é€ äº†ä¸€ä¸ªæ¸¸æˆçº§çš„动画引擎。

CADisplayLink是类似NSTimer的定时器,ä¸åŒä¹‹å¤„在于,NSTimer用于我们定义任务的执

';

$res = saveImgFromList($content, $dist, $url);
$param = array();
// 你想要的内容
$param['content'] = $res['content'];
// 内容里面图片url组成的数组
$param['image_urls'] = $res['image_urls'];

这时你的内容就可以入库了

你可能感兴趣的:(编程语言,防盗链,抓取,ckeditor)