C#操作Word读取表格内容

1、注意Office版本,一般office2016环境下写的东西,office2013环境下是运行不了的。(据说可以有第三方库支持,以后慢慢研究)

2、注意中断程序号一定要注销计算机,否则docx文件老是提示被占用。

3、注意try catch finally一定得加上。

4、多余符号用正则表达排除,我始终觉得 Regex Workbench 是极好用的正则工具,不知道为什么现在网上找不到了,需要可以联系我。

贴段代码吧:

        /// 
        /// 获取word文件的文本内容
        /// 
        /// 
        /// 
        private string DocToExcel2(string docFileName)
        {
            //实例化COM        
            Word.ApplicationClass app = null;
            Word.Document wd = null;

            object nullobj = System.Reflection.Missing.Value;  
            object fileobj = docFileName;
            string context = string.Empty;

            try
            {
                app = new Word.ApplicationClass();

                wd = app.Documents.Open(ref fileobj, ref nullobj, ref nullobj, ref nullobj,
                                                        ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj,
                                                        ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj,
                                                        ref nullobj, ref nullobj);
                //取得doc文件中的文本内容

                foreach (Word.Table table in wd.Tables)
                {
                    //遍历每一行去处理
                    for (int k = 0; k < table.Rows.Count; k++)
                    {
                        string firstContent = table.Cell(k, 1).Range.Text;
                        //图上编号
                        Regex regTSBH1 = new Regex(@"^图上编号.+");
                        //Match mcMPH1 = regMPH1.Match();
                        if (regTSBH1.IsMatch(firstContent))
                        {
                            Regex regTSBH2 = new Regex(@"^([^\t\v\s]+).+");
                            Match mcTSBH2 = regTSBH2.Match(table.Cell(k, 2).Range.Text.Trim());

                            context += /*mcMPH1.Groups[1].Value.Trim() + "," + */mcTSBH2.Groups[1].Value.Trim() + ",";
                        }

                        //门牌号
                        Regex regMPH1 = new Regex(@"^门牌号.+");
                        //Match mcMPH1 = regMPH1.Match();
                        if (regMPH1.IsMatch(firstContent))
                        {
                            Regex regMPH2 = new Regex(@"^([^\t\v\s]+).+");
                            Match mcMPH2 = regMPH2.Match(table.Cell(k, 2).Range.Text.Trim());

                            context += /*mcMPH1.Groups[1].Value.Trim() + "," + */mcMPH2.Groups[1].Value.Trim() + ",";
                        }

                        //户主
                        Regex regHZ1 = new Regex(@"^户主.+");
                        //Match mcHZ1 = regHZ1.Match(table.Cell(5, 1).Range.Text.Trim());

                        if (regHZ1.IsMatch(firstContent))
                        {
                            Regex regHZ2 = new Regex(@"^^([^\t\v\s]+).+");
                            Match mcHZ2 = regHZ2.Match(table.Cell(k, 2).Range.Text.Trim());

                            context += /*mcHZ1.Groups[1].Value.Trim() + "," + */mcHZ2.Groups[1].Value.Trim() + ",";
                        }

                        //电话
                        Regex regDH1 = new Regex(@"^电话.+");
                        //Match mcDH1 = regDH1.Match(table.Cell(6, 1).Range.Text.Trim());

                        if (regDH1.IsMatch(firstContent))
                        {
                            Regex regDH2 = new Regex(@"^([^\t\v\s]+).+");
                            Match mcDH2 = regDH2.Match(table.Cell(k, 2).Range.Text.Trim());

                            context += /*mcDH1.Groups[1].Value.Trim() + "," + */mcDH2.Groups[1].Value.Trim() + ",";
                        }

                        //楼层数
                        Regex regLCS1 = new Regex(@"^楼层数.+");
                        //Match mcLCS1 = regLCS1.Match(table.Cell(7, 1).Range.Text.Trim());

                        if (regLCS1.IsMatch(firstContent))
                        {
                            Regex regLCS2 = new Regex(@"^([^\t\v\s]+).+");
                            Match mcLCS2 = regLCS2.Match(table.Cell(k, 2).Range.Text.Trim());

                            context += /*mcLCS1.Groups[1].Value.Trim() + "," + */mcLCS2.Groups[1].Value.Trim() + ",";

                            context += /*mcLCS1.Groups[1].Value.Trim() + "," + */docFileName + "\r\n";
                        }


                        //文件名,用于排错
                        //Regex regLCS1 = new Regex(@"^([\u4e00-\u9fa5]+).+");
                        //Match mcLCS1 = regLCS1.Match(table.Cell(7, 1).Range.Text.Trim());
                        //if((k % 7 == 0)&&(k != 0))
                        //{
                        //    context += /*mcLCS1.Groups[1].Value.Trim() + "," + */docFileName + "\r\n";
                        //}
                    }
                }
            }
            catch (Exception error)
            {
                MessageBox.Show("Error:" + error.Message);
            }
            finally
            {
                //关闭文件
                wd.Close(ref nullobj, ref nullobj, ref nullobj);
                //关闭COM
                app.Quit(ref nullobj, ref nullobj, ref nullobj);
            }
            //返回文本内容

            return context;
        }

 

你可能感兴趣的:(其它)