使用phpoffice/phpword读取word内容

一:phpoffice/phpword安装

composer require phpoffice/phpword

phpword的GitHub地址:https://github.com/PHPOffice/PHPWord

phpword文档地址:https://phpword.readthedocs.io/en/latest/

二:加载word文档

$word = \PhpOffice\PhpWord\IOFactory::load("xxx")

三:获取word所有节点

$sections = $word->getSections()

四:获取word所有段落

$section->getElements()

五:判断文本元素类型

if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
    //文本元素
} else if ($element instanceof \PhpOffice\PhpWord\Element\Table) {
    //表格元素
}

六:获取word文本内容

$node->getText()

七:获取word图片

//获取图片编码
$imageData = $node->getImageStringData(true);
//添加图片html显示标头
$imageData = 'data:' . $node->getImageType() . ';base64,' . $imageData;

八:读取word内容示例

/**
 * 获取word文档内容
 * @param string $wordPath
 * @return array
 */
public function getWord($wordPath = '')
{
    //加载word文档,使用phpword处理
    $word = \PhpOffice\PhpWord\IOFactory::load($wordPath);
    return $this->getNodeContent($word);
}

/**
 * 根据word主节点获取分节点内容
 * @param $word
 * @return array
 */
public function getNodeContent($word)
{
    $return = [];
    //分解部分
    foreach ($word->getSections() as $section)
    {
        if ($section instanceof \PhpOffice\PhpWord\Element\Section) {
            //分解元素
            foreach ($section->getElements() as $element)
            {
                //文本元素
                if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
                    $text = '';
                    foreach ($element->getElements() as $ele) {
                        $text .= $this->getTextNode($ele);
                    }
                    $return[] = $text;
                }
                //表格元素
                else if ($element instanceof \PhpOffice\PhpWord\Element\Table) {
                    foreach ($element->getRows() as $ele)
                    {
                        $return[] = $this->getTableNode($ele);
                    }
                }
            }
        }
    }
    return $return;
}

/**
 * 获取文档节点内容
 * @param $node
 * @return string
 */
public function getTextNode($node)
{
    $return = '';
    //处理文本
    if ($node instanceof \PhpOffice\PhpWord\Element\Text)
    {
        $return .= $node->getText();
    }
    //处理图片
    else if ($node instanceof \PhpOffice\PhpWord\Element\Image)
    {
        $return .= $this->pic2text($node);
    }
    //处理文本元素
    else if ($node instanceof \PhpOffice\PhpWord\Element\TextRun) {
        foreach ($node->getElements() as $ele) {
            $return .= $this->getTextNode($ele);
        }
    }
    return $return;
}

/**
 * 获取表格节点内容
 * @param $node
 * @return string
 */
public function getTableNode($node)
{
    $return = '';
    //处理行
    if ($node instanceof \PhpOffice\PhpWord\Element\Row) {
        foreach ($node->getCells() as $ele)
        {
            $return .= $this->getTableNode($ele);
        }
    }
    //处理列
    else if ($node instanceof \PhpOffice\PhpWord\Element\Cell) {
        foreach ($node->getElements() as $ele)
        {
            $return .= $this->getTextNode($ele);
        }
    }
    return $return;
}

/**
 * 处理word文档中base64格式图片
 * @param $node
 * @return string
 */
public function pic2text($node)
{
    //获取图片编码
    $imageData = $node->getImageStringData(true);
    //添加图片html显示标头
    $imageData = 'data:' . $node->getImageType() . ';base64,' . $imageData;
    $return = '';
    return $return;
}

调用方法:

$docx = 'XXX.docx';
$word  = $this->getWord($docx);

你可能感兴趣的:(word,php)