正则匹配div中的a标记的href 和content

 
 

        很久后又一次接触正则,那种似曾相似的感觉,你是不是也有这样的感觉,这么小CASE的东西,我一定要把他彻底掌握了,于是花时间看手册,于是就明白了,于是下次又是似曾相似……


总写下今天用到的地方:

//demo

$str = '<div class="test">
        <div class="wcb"><li>li</li>
        <a href="m.vip.com/shoe/47">跑步鞋</a>
        <a href="m.vip.com/shoe/48">篮球鞋</a>
        </div>
        <div class="wcb">tryagin</div>
        </div>';

        // 先匹配出DIV
        preg_match_all('/<div class=\"wcb\">(.*?)<\/div>/i', $str, $matches);

        if(!empty($matches))
        {
            $target_content = $matches[0][0];
        }

        //第二次匹配出其中的a标记,注意()是返回匹配的内容
        preg_match_all('/<a href=\"((?!\/topic\/show).[^<]*)\">(.*?)<\/a>/i', $target_content, $matches_detail);
        if(!empty($matches_detail))
        {
            $href_content = $matches_detail[1];
            $detail_content = $matches_detail[2];
        }

方法:

/**
    * function: 获取主题馆数据
    * 
    * @param string  $topic    主题名
    * @param string  $pattern  正则模式
    *
    * @return array  $data  $data['href_content']:链接信息数组;$data['detail_content']:名称信息数组
    */
    private function get_topic_data($topic, $pattern1, $pattern2)
   	{
   		$data = array();

   		if( empty($topic) || empty($pattern1) || empty($pattern2))
   		{
   			return $data;
   		}
   		// 构建主题馆数据
    	// 获取整体数据
    	// 获取页面ID
        $modpage_id = 0;
        $modpages = $this->Modpage_Model->getAllPages();
        foreach($modpages as $one_page) {
            if ($one_page['department'] == $topic) {
                $modpage_id = $one_page['id'];
            }
        }

        // 取信息
        $version_data = $this->Modpage_Model->getCurrentPageVersion($modpage_id);

        $page_info_raw = $this->Modpage_Model->getVersionData($modpage_id, $version_data['version']);

        // 生成数据
        foreach ($page_info_raw as $k => $v)
        {
        	//儿童单独处理
        	if($topic == 'children')
        	{
        		if($v['module_id'] == 1 && $v['desc'] == '快速找宝贝')
        		{	
        			$parse_str = $v['content'];
        			break;
        		}
        	}
        	else
        	{
        		// 去后台副导航数据
        		if($v['module_id'] == 4)
	        	{
	        		$parse_str = $v['content'];
	        		break;
	        	}
        	}
        }

    	// 匹配主题馆需要的数据
        // 清洗数据
		$parse_str = $this -> clearData($parse_str);
    	preg_match_all($pattern1, $parse_str, $matches);

        if(!empty($matches))
        {
            $target_content = $matches[0][0];
        }

        // 替换主机名
		$target_content = str_replace($this -> website, TOUCH_URL, $target_content);

		// 清洗数据
        $target_content = $this -> clearData($target_content);

        preg_match_all($pattern2, $target_content, $matches_detail);

        if(!empty($matches_detail))
        {
            $data['href_content'] = $matches_detail[1];
            $data['detail_content'] = $matches_detail[2];
        }
        return $data;
   	}

   	/**
    * function: 清洗数据(过滤字符串中的换行符、制表符)
    * 
    * @param string  $str    初始字符串
    *
    * @return string  $str
    */
   	private function clearData( $str )
   	{
   		if( empty($str) )
   		{
   			return '';
   		}
   		// 清除换行符
   		$str = str_replace("\r\n", '', $str);
   		// 清除换行符 
		$str = str_replace("\n", '', $str);
		// 清除制表符
		$str = str_replace("\t", '', $str); 
		$str = trim( $str );
		return $str;
   	}

//测试方法

/**
    * 首页入口
    * 
    */
    public function index() {

    	$data = array();
    	$result = array();
    	
    	// 获取主题馆信息

    	// 主题馆原始数据  array('部门'=>array('类型','正则一','正则二'))
        $topic_name_arr = array(
        	'sports' => array('sport','/<div class=\"spnavdiv\">(.*?)<\/div>/i','/<a target="_blank" href=\"((?!\/topic\/show).[^<]*)\">(.*?)<\/a>/i'),
        	'womenshoes' => array('women','/<div class=\"OB_floatL\">(.*?)<\/div>/i','/<a href=\"((?!\/topic\/show).[^<]*)\" .*?>(.*?)<\/a>/i'),
        	'menshoes' => array('man','/<div class=\"OB_floatL\" .*?>(.*?)<\/div>/i','/<a target="_blank" href=\"((?!\/topic\/show).[^<]*)\">(.*?)<\/a>/i'),
        	'outdoor' => array('outdoor','/<div class=\"navlidiv\" style=\".*?\">(.*?)<\/div>/i','/<a href=\"((?!\/topic\/show).[^<]*)\"  target="_blank" .*?>(.*?)<\/a>/i'),
        	'children' => array('children','/<table class=\"catetable_2\">(.*?)<\/div>/i','/<a href=\"((?!\/topic\/show).[^<]*)\" target="_blank">(.*?)<\/a>/i'),
        	);

        // 组织数据
        foreach ($topic_name_arr as $type => $pattern) {
        	$result = $this -> get_topic_data( $type, $pattern[1], $pattern[2] );
        	$data['data'][$pattern[0]] = $result;
        }

        $h_data['jspath'] = 'index';
        $this->load->view("touch/common/header",$h_data);
        $this->load->view("touch/index/index",$data);
        $this->load->view("touch/common/footer");
    }


应用场景:

<div class="navsubmenu" style="width:280px;left:0"> <span class="arr" style="left:30px;"></span>
          <div class="OB_floatL" style="margin-right:30px">
            <h5 class="subh5">全部男鞋</h5>
            <p><a target="_blank" href="http://m.vip.com/shoe/167"class="hot">休闲鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/165">正装鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/166" class="hot">商务鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/170">凉鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/173">户外鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/172">帆布鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/171">板鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/168" class="hot">男靴</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/351">棉鞋</a></p>
          </div>
          <div class="OB_floatL" style="margin-right:30px">
            <h5 class="subh5">热门品类</h5>
            <p><a target="_blank" href="http://m.vip.com/topic/show/8049" class="hot">1月新品</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?top_key=%E5%B7%A5%E8%A3%85%E9%9E%8B&new_cat=164" class="hot">工装鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?top_key=%E7%89%9B%E6%B4%A5%E9%9E%8B&new_cat=164">牛津鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/168">男靴</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/167-11v1585">伐木鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?top_key=%E5%B8%86%E8%88%B9%E9%9E%8B&new_cat=164">帆船鞋</a></p>
             <p><a target="_blank" href="http://m.vip.com/shoe/166">商务皮鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/166-16v243">增高鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?gender=1&top_key=%E5%86%9B%E8%AD%A6%E9%9D%B4">军警靴</a></p>
          </div>
</div>


PS:

      preg快速查看地址:http://msdn.microsoft.com/zh-cn/library/ae5bf541(v=vs.80).aspx

     小结:这是最笨,但是最快解决问题的方法

                 参考递归实现:http://zhidao.baidu.com/link?url=WYgzZnK-_kD_ooBmH3iALiPniS054Ympziofk0nX1B6Nywy1cPjGfnhSp3PaQ95qw_rEOG-E_GX3t4YFvtQeja


    总结:

            ()可以返回匹配到的内容


你可能感兴趣的:(正则匹配div中的a标记的href 和content)