protected string str = "<table><tr><td>sdasasdsdd</td></tr></table><br><p>sds</p><img id='img1' src='http://www.baidu.com/img/baidu_logo.gif' width='100' height='50' alt=''>aaassss<br><img src='http://www.baidu.com/img/baidu_logo.gif' width='100' height='50' alt=''> 说是道 ";
protected void Page_Load(object sender, EventArgs e)
{
//string regexstr = @"<[^>]*>"; //去除所有的标签
//@"<script[^>]*?>.*?</script >" //去除所有脚本,中间部分也删除
// string regexstr = @"<img[^>]*>"; //去除图片的正则
// string regexstr = @"<(?!br).*?>"; //去除所有标签,只剩br
// string regexstr = @"<table[^>]*?>.*?</table>"; //去除table里面的所有内容
string regexstr = @"<(?!img|br|p|/p).*?>"; //去除所有标签,只剩img,br,p
str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);
}
asp中正则表达式去除HTML标记(窃自eWebEditor)
2009年12月31日 星期四 下午 12:40
function
ExecReg(re, content)
Dim
myRegExp, ResultString
Set
myRegExp
=
New
RegExp
myRegExp.Global
=
True
myRegExp.Pattern
=
re
ResultString
=
myRegExp.Replace(content,
""
)
ExecReg
=
ResultString
end function
function
DecodeFilter(html)
html
=
LCase
(html)
'
去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,
html
=
ExecReg(
"
</?script[^>]*>
"
, html)
html
=
ExecReg(
"
(javascript|jscript|vbscript|vbs):
"
, html)
html
=
ExecReg(
"
on(mouse|exit|error|click|key)
"
, html)
html
=
ExecReg(
"
&#
"
, html)
'
去除表格<table><tr><td><th><a><p><img><div>
html
=
ExecReg(
"
</?table[^>]*>
"
, html)
html
=
ExecReg(
"
</?tr[^>]*>
"
, html)
html
=
ExecReg(
"
</?th[^>]*>
"
, html)
html
=
ExecReg(
"
</?td[^>]*>
"
, html)
html
=
ExecReg(
"
</?a[^>]*>
"
, html)
html
=
ExecReg(
"
</?p[^>]*>
"
, html)
html
=
ExecReg(
"
</?img[^>]*>
"
, html)
html
=
ExecReg(
"
</?div[^>]*>
"
, html)
html
=
ExecReg(
"
</?ul[^>]*>
"
, html)
html
=
ExecReg(
"
</?li[^>]*>
"
, html)
html
=
ExecReg(
"
</?tbody[^>]*>
"
, html)
html
=
ExecReg(
"
</?h1[^>]*>
"
, html)
html
=
ExecReg(
"
</?h2[^>]*>
"
, html)
html
=
ExecReg(
"
</?h3[^>]*>
"
, html)
html
=
ExecReg(
"
</?h4[^>]*>
"
, html)
html
=
ExecReg(
"
</?h5[^>]*>
"
, html)
html
=
ExecReg(
"
</?h6[^>]*>
"
, html)
html
=
ExecReg(
"
</?b[^>]*>
"
, html)
html
=
ExecReg(
"
</?strong[^>]*>
"
, html)
'
去除样式类class=""
html
=
ExecReg(
"
(<[^>]+) class=[^ |^>]*([^>]*>)
"
, html)
'
去除样式style=""
html
=
ExecReg(
"
(<[^>]+) style=""[^""]*""([^>]*>)
"
, html)
'
去除XML<?xml>
html
=
ExecReg(
"
</?xml[^>]*>
"
, html)
'
去除命名空间<o:p></o:p>
html
=
ExecReg(
"
</?[a-z]+:[^>]*>
"
, html)
'
去除字体<font></font>
html
=
ExecReg(
"
</?font[^>]*>
"
, html)
'
去除字幕<marquee></marquee>
html
=
ExecReg(
"
</?marquee[^>]*>
"
, html)
'
去除对象<object><param><embed></object>
html
=
ExecReg(
"
</?object[^>]*>
"
, html)
html
=
ExecReg(
"
</?param[^>]*>
"
, html)
html
=
ExecReg(
"
</?embed[^>]*>
"
, html)
DecodeFilter
=
html
end function
Function RemoveHTML(strText)
Dim RegEx
Set RegEx = New RegExp
RegEx.Pattern = "<[^>]*>"
RegEx.Global = True
RemoveHTML = RegEx.Replace(strText, "")
End Function
function nohtml(str)
dim re
Set re=new RegExp
re.IgnoreCase =true
re.Global=True
re.Pattern="(/<.[^/<]*/>)"
str=re.replace(str," ")
re.Pattern="(/<//[^/<]*/>)"
str=re.replace(str," ")
str=replace(str," ","")
str=replace(str," ","")
nohtml=str
set re=nothing
end function