将Word文档转化为HTML格式的文档

利用Word.Application提供的方法,可以很轻易地将Word文档转化为HTML等其它格式,下面就是实现的全部的代码。注意,必须先添加引用:

说明:以上代码为Office2000环境下的代码,如果是Office XP或者Office 2003,您必须引用不同的Microsoft Word Object Library,同时,docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatHTML});参数的多少也可能不同,具体要参照:
http://msdn.microsoft.com/library/en-us/dnanchor/html/odc_ancoffice.ASP

Visual C#

WordToHtml.aspx

<%@ Page language="c#" Codebehind="WordToHtml.aspx.cs" AutoEventWireup="false" Inherits="aspx<a href="/dev/web/" target="_blank">Web</a>cs.WordToHtml" %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" > <HTML> <HEAD> <title>WordToHtml</title> <meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1"> <meta name="CODE_LANGUAGE" Content="C#"> <meta name="vs_defaultClientScript" content="JavaScript"> <meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5"> </HEAD> <body MS_POSITIONING="GridLayout"> <form id="Form1" method="post" runat="server"> </form> </body> </HTML>

WordToHtml.aspx.cs

using System; using System.Collections; using System.ComponentModel; using System.Data; using System.Drawing; using System.Web; using System.Web.SessionState; using System.Web.UI; using System.Web.UI.<a href="/dev/dotnet/aspnet/webcontrols/" target="_blank">WebControls</a>; using System.Web.UI.HtmlControls; using Office; namespace aspxWebcs { /// <summary> /// WordToHtml 的摘要说明。 /// 首先要添加引用:Microsoft Word 9.0 Object Library /// </summary> public class WordToHtml : System.Web.UI.Page { private void Page_Load(object sender, System.EventArgs e) { // 在此处放置用户代码以初始化页面 Word.ApplicationClass word = new Word.ApplicationClass(); Type wordType = word.GetType(); Word.Documents docs = word.Documents; // 打开文件 Type docsType = docs.GetType(); object fileName = "d://tmp//aaa.doc"; Word.Document doc = (Word.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] {fileName, true, true}); // 转换格式,另存为 Type docType = doc.GetType(); object saveFileName = "d://tmp//aaa.html"; //下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成: /* docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); */ ///其它格式: ///wdFormatHTML ///wdFormatDocument ///wdFormatDOSText ///wdFormatDOSTextLineBreaks ///wdFormatEncodedText ///wdFormatRTF ///wdFormatTemplate ///wdFormatText ///wdFormatTextLineBreaks ///wdFormatUnicodeText docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatHTML}); // 退出 Word wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null); } #region Web 窗体设计器生成的代码 override protected void OnInit(EventArgs e) { // // CODEGEN: 该调用是 <a href="/dev/dotnet/aspnet/" target="_blank">ASP.NET</a> Web 窗体设计器所必需的。 // InitializeComponent(); base.OnInit(e); } /// <summary> /// 设计器支持所需的方法 - 不要使用代码编辑器修改 /// 此方法的内容。 /// </summary> private void InitializeComponent() { this.Load += new System.EventHandler(this.Page_Load); } #endregion } }

你可能感兴趣的:(将Word文档转化为HTML格式的文档)