php采集网页table表格数据(将HTML表格的每行每列转为数组)

实现代码:

]*?>'si","",$table);
  $table = preg_replace("']*?>'si","",$table);
  $table = preg_replace("']*?>'si","",$table);
  $table = str_replace("","{tr}",$table);
  $table = str_replace("","{td}",$table);
  //去掉 HTML 标记 
  $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);
  //去掉空白字符 
  $table = preg_replace("'([rn])[s]+'","",$table);
  $table = str_replace(" ","",$table);
  $table = str_replace(" ","",$table);
  $table = explode('{tr}', $table);
  array_pop($table);
  foreach ($table as $key=>$tr) {
    $td = explode('{td}', $tr);
    array_pop($td);
    $td_array[] = $td;
  }
  return $td_array;
}

?>

 调用实例:

','');
	$arr=get_td_array($table);
	//输出数组
	echo "
";
	var_dump($arr);
/**
 * php截取指定两个字符之间字符串方式一
 * @param string $begin  开始字符串
 * @param string $end    结束字符串
 * @param string $str    需要截取的字符串
 * @return string
 */
function cut_str($begin,$end,$str){
    $b = mb_strpos($str,$begin) + mb_strlen($begin);
    $e = mb_strpos($str,$end) - $b;
    return mb_substr($str,$b,$e);;
}
/**
 * php截取指定两个字符之间字符串方式二
 * @param string $str    需要截取的字符串
 * @param string $start  开始字符串
 * @param string $end    结束字符串
 * @return string
 */
function get_between($str, $start, $end) {
    $substr = substr($str, strlen($start)+strpos($str, $start),(strlen($str) - strpos($str, $end))*(-1));
    return $substr;
}
//php采集table表格数据(将HTML表格的每行每列转为数组)
function get_td_array($table) {
  $table = preg_replace("']*?>'si","",$table);
  $table = preg_replace("']*?>'si","",$table);
  $table = preg_replace("']*?>'si","",$table);
  $table = str_replace("","{tr}",$table);
  $table = str_replace("","{td}",$table);
  //去掉 HTML 标记 
  $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);
  //去掉空白字符 
  $table = preg_replace("'([rn])[s]+'","",$table);
  $table = str_replace(" ","",$table);
  $table = str_replace(" ","",$table);
  $table = explode('{tr}', $table);
  array_pop($table);
  foreach ($table as $key=>$tr) {
    $td = explode('{td}', $tr);
    array_pop($td);
    $td_array[] = $td;
  }
  return $td_array;
}
?> 

 打印结果:

php采集网页table表格数据(将HTML表格的每行每列转为数组)_第1张图片

 

你可能感兴趣的:(PHP)