C# 正则表达式,去除所有HTML标签

protected string str = "
sdasasdsdd

sds

aaassss
说是道 ";

    protected void Page_Load(object sender, EventArgs e)
    {

        //string regexstr = @"<[^>]*>";    //去除所有的标签

        //@"]*?>.*?" //去除所有脚本,中间部分也删除
         
        // string regexstr = @"]*>";   //去除图片的正则

       // string regexstr = @"<(?!br).*?>";   //去除所有标签,只剩br

        // string regexstr = @"]*?>.*?";   //去除table里面的所有内容

        string regexstr = @"<(?!img|br|p|/p).*?>";   //去除所有标签,只剩img,br,p
   
        str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);

    }

 

 

 

 

 

ASP.NET 去除所有HTML标记 < type="text/javascript">function StorePage(){d=document;t=d.selection?(d.selection.type!='None'?d.selection.createRange().text:''):(d.getSelection?d.getSelection():'');void(keyit=window.open('http://www.365key.com/storeit.aspx?t='+escape(d.title)+'&u='+escape(d.location.href)+'&c='+escape(t),'keyit','scrollbars=no,width=475,height=575,left=75,top=20,status=no,resizable=yes'));keyit.focus();}
注意:需要先using  System.Text.RegularExpressions;  

/**////  

   
  ///   去除HTML标记   
  ///  
   
  ///   包括HTML的源码      
  ///   已经去除后的文字   
  public   static   string   NoHTML(string   Htmlstring)   
  {   
  //删除脚本   
  Htmlstring   =   Regex.Replace(Htmlstring,@"]*?>.*?","",RegexOptions.IgnoreCase);   
  //删除HTML   
  Htmlstring   =   Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase);   
  Htmlstring   =   Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",RegexOptions.IgnoreCase);   
  Htmlstring   =   Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);   
  Htmlstring   =   Regex.Replace(Htmlstring,@"",   
                      @"

你可能感兴趣的:(.net)