利用OpenXml SDK获取Word段落样式和文本

该小例子演示了如何根据RelationshipType获取文档格式和段落文本。

1、代码:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.IO.Packaging;
using System.IO;
using System.Xml;

namespace PackageRelationshipsGetParagraphs
{
    public static class LocalExtensions
    {
        public static string StringConcatenate(this IEnumerable source)
        {
            StringBuilder sb = new StringBuilder();
            foreach (string s in source)
                sb.Append(s);
            return sb.ToString();
        }

        public static string StringConcatenate(this IEnumerable source,
            Func func)
        {
            StringBuilder sb = new StringBuilder();
            foreach (T item in source)
                sb.Append(func(item));
            return sb.ToString();
        }

        public static string StringConcatenate(this IEnumerable source, string separator)
        {
            StringBuilder sb = new StringBuilder();
            foreach (string s in source)
                sb.Append(s).Append(separator);
            return sb.ToString();
        }

        public static string StringConcatenate(this IEnumerable source,
            Func func, string separator)
        {
            StringBuilder sb = new StringBuilder();
            foreach (T item in source)
                sb.Append(func(item)).Append(separator);
            return sb.ToString();
        }
    }

    class Program
    {
        public static string ParagraphText(XElement e)
        {
            XNamespace w = e.Name.Namespace;
            return e
                    .Elements(w + "r")
                    .Elements(w + "t")
                    .StringConcatenate(element => (string)element);
        }

        static void Main(string[] args)
        {
            const string fileName = "Test.docx";

            const string documentRelationshipType =
                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
            const string stylesRelationshipType =
                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
            const string wordmlNamespace =
                "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
            XNamespace w = wordmlNamespace;

            XDocument xDoc = null;
            XDocument styleDoc = null;

            using (Package wdPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read))
            {
                PackageRelationship docPackageRelationship =
                    wdPackage.GetRelationshipsByType(documentRelationshipType).FirstOrDefault();

                if (docPackageRelationship != null)
                {
                    // Resolve the Relationship Target Uri
                    // so the Document Part can be retrieved (expected /word/document.xml)
                    Uri documentUri = PackUriHelper.ResolvePartUri(new Uri("/", UriKind.Relative),
                        docPackageRelationship.TargetUri);
                    PackagePart documentPart = wdPackage.GetPart(documentUri);

                    // Load the style XML to the part into an XDocument instance.
                    xDoc = XDocument.Load(XmlReader.Create(documentPart.GetStream()));

                    // Find the style part. 
                    PackageRelationship styleRelation =
                        documentPart.GetRelationshipsByType(stylesRelationshipType)
                        .FirstOrDefault();
                    if (null != styleRelation)
                    {
                        Uri styleUri = PackUriHelper.ResolvePartUri(documentUri, styleRelation.TargetUri);
                        PackagePart stylePart = wdPackage.GetPart(styleUri);

                        // load the style XML in the part into an XDocument instance.
                        styleDoc = XDocument.Load(XmlReader.Create(stylePart.GetStream()));
                    }
                }
            }

            // 
            //  
            //  
            // 
            // Get styleId, we can get "a" from the below sample xml
            string defaultStyle =
                (string)(from style in styleDoc.Root.Elements(w + "style")
                         where (string)style.Attribute(w + "type") == "paragraph" &&
                               (string)style.Attribute(w + "default") == "1"
                         select style)
                 .First().Attribute(w + "styleId");


            // Find all paragraphs in the document.
            var paragraphs =
                from para in xDoc
                             .Root
                             .Element(w + "body")
                             .Descendants(w + "p")
                let styleNode = para
                                .Elements(w + "pPr")
                                .Elements(w + "pStyle")
                                .FirstOrDefault()
                select new
                {
                    ParagraphNode = para,
                    StyleName = styleNode != null ? (string)styleNode.Attribute(w + "val") : defaultStyle
                };

            // Retrieve the text of each paragraph
            var paraWithText =
                from para in paragraphs
                select new
                {
                    ParagraphNode = para.ParagraphNode,
                    StyleName = para.StyleName,
                    Text = ParagraphText(para.ParagraphNode)
                };
            foreach (var p in paraWithText)
                Console.WriteLine("StyleName:{0}, Text:{1}", p.StyleName, p.Text);

            Console.ReadKey();
        }
    }
}

2、运行结果:


你可能感兴趣的:(OpenXml)