C#读取HTML文件内容写入记事本

try
            {
                int totalFile = 0;
                //string dirPath = @"E:/chfuMetarnet/BSC6810 alarm/";
                if (this.textBox1.Text.Trim() == "")
                {
                    MessageBox.Show("请输入HTML文件路径!");
                }
                else
                {
                    string dirPath = this.textBox1.Text.Trim();
                    if (!dirPath.Substring(dirPath.Length - 1).Contains("//"))
                    {
                        dirPath = dirPath+"//";
                    }
                    StreamWriter sw;

                    DirectoryInfo dirInfo = new DirectoryInfo(dirPath);
                    FileInfo[] files = dirInfo.GetFiles();
                    string filename = dirPath + "告警经验库信息.txt";
                    if (File.Exists(filename))
                    {
                        sw = File.AppendText(filename);
                    }
                    else
                    {
                        sw = File.CreateText(filename);
                    }
                    foreach (FileInfo fileinfo in files)
                    {
                        if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件
                        {
                            totalFile = totalFile + 1;
                            WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name);
                            WebResponse myWebResponse = myWebRequest.GetResponse();
                            Stream myStream = myWebResponse.GetResponseStream();
                            Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
                            StreamReader myStreamReader = new StreamReader(myStream, encode);
                            string strhtml = myStreamReader.ReadToEnd();
                            myWebResponse.Close();
                            string stroutput = strhtml;
                            Regex regex = new Regex(@"<[^>]+>|]+>");//去掉HTML标记的正则表达式
                            string tmpStr = "

([^<]*)

";        //获取

之间内容的表达式
                            Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                            string causename = TitleMatch.Value.ToString();//包含

标记
                            causename = Regex.Replace(causename, "[/n|/r|/t]", " ");//去掉换行和TAB键符号
                            causename = causename.Trim();
                            string cause = causename.Substring(4, causename.Length - 9);//得到告警原因
                            string titleStr = "([^<]*)";
                            TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                            string titlename = TitleMatch.Value.ToString();
                            titlename = Regex.Replace(titlename, "[/n|/r|/t]", "");//去掉换行和TAB键符号
                            titlename = titlename.Trim();
                            string regexStr = "
  • (?.*?)
";//获取
  • 后边的内容,直到
结尾
                            Regex r = new Regex(regexStr, RegexOptions.None);
                            strhtml = Regex.Replace(strhtml, "[/n|/r|/t]", "");//去掉换行和TAB键符号
                            Match mc = r.Match(strhtml);
                            string dataStr = mc.Groups["key"].Value;
                            dataStr = "
  • " + dataStr + "
";//得到完整的
    之间的源码
                                strhtml = strhtml.Replace(dataStr, "");//将去掉换行符和tab键的源码中去除
      部分源码
                                  strhtml = strhtml.Replace(titlename, "");//去掉
                                  strhtml = regex.Replace(strhtml, " ");//过滤掉HTML标记
                                  strhtml = strhtml.Replace(" ", "");//去掉空格字符
                                  string[] arr = cause.Split(' ');
                                  string zhCause = arr[arr.Length - 1];//获取数组最后一个元素:告警原因
                                  sw.WriteLine("第" + totalFile + "个文件:" + fileinfo.Name);
                                  sw.WriteLine("-----告警原因------:");
                                  //sw.WriteLine(cause);// ALM-1 网元启动
                                  zhCause=this.chinaString(zhCause);
                                  sw.WriteLine(zhCause);//网元启动
                                  sw.WriteLine("-----处理经验------:");
                                  sw.WriteLine(strhtml);
                                  sw.WriteLine();
                                  sw.Flush();

                              }
                          }
                          sw.Close();
                          MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                      }
                  }
                  catch (Exception ee)
                  {
                      MessageBox

      你可能感兴趣的:(C#读取HTML文件内容写入记事本)