php解析word文档

一个简单的word文档阅读类,使用正则实现简单的docx文档阅读,下面是代码

open($file);
		$xml = $zip->getFromName("word/document.xml");
		$table = array(); //缓存表格
		$textbox = array(); //缓存文本框
		//处理表格
		preg_match_all('/([\s\S]*?)<\/w:tbl>/s', $xml, $tableHandel);
		for ($i = 0; $i < count($tableHandel[0]); $i++) {
			$table["@TABLECONTENT@" . $i] = $tableHandel[0][$i];
			$xml = str_replace($tableHandel[0][$i], "@TABLECONTENT@" . $i, $xml);
		}
		//处理文本框
		preg_match_all('/([\s\S]*?)<\/w:pict>/s', $xml, $textboxHandel);
		foreach ($textboxHandel[0] as $key => &$value) {
			$temp = $value;
			$temp2 = "";
			preg_match_all('/([\s\S]*?)<\/w:t>/s', $value, $div);
			foreach ($div[0] as $k => &$v) {
				$temp2 .= $v;
			}
			$xml = str_replace($temp, $temp2, $xml);
		}
		for ($i = 0; $i < count($textboxHandel[0]); $i++) {
			$textbox["@TEXTBOXCONTENT@" . $i] = $textboxHandel[0][$i];
			$xml = str_replace($textboxHandel[0][$i], "@TEXTBOXCONTENT@" . $i, $xml);
		}
		preg_match_all('/|@TABLECONTENT@\d|@TEXTBOXCONTENT@\d/s', $xml, $content);
		foreach ($content[0] as $key => &$value) {
			if (strpos($value, "TABLECONTENT")) {
				$value = $table[$value];
			}
			$value = str_replace("w:", "", $value);
		}
		$content = $content[0]; //把段落和表格解析出来
		$docx = <<
	table{
		background-color:#000;
	}
	table td{
		padding:5px 5px 5px 5px;
	}
	table tr{
		background-color:#fff;
	}

HTML_ENTITIES;
		foreach ($content as $a => &$b) {
			$b = json_decode(
				json_encode(
					simplexml_load_string($b)
				),
				true
			);
			if (isset($b['tr'])) {
				//表格
				$docx .= "";
				foreach ($b['tr'] as $key => $value) {
					$docx .= "";
					foreach ($value['tc'] as $k => $v) {
						if (isset($v['p']['r'][0])) {
							$docx .= "";
						} else {
							$docx .= "";
						}

					}
					$docx .= "";
				}
				$docx .= "
"; foreach ($v['p']['r'] as $ke => $va) { $docx .= $va['t']; } $docx .= "" . $v['p']['r']['t'] . "
"; } else { //段落 $docx .= "

"; if (isset($b['r'][0])) { foreach ($b['r'] as $key => &$value) { if (is_string($value['t'])) { $docx .= $value['t']; } } } else { if (is_string($b['r']['t'])) { $docx .= $b['r']['t']; } } $docx .= "

"; } } return $docx; } }

 

你可能感兴趣的:(php开发)