using
System;
using
System.Data;
using
System.Configuration;
using
System.Web;
using
System.Web.Security;
using
System.Web.UI;
using
System.Web.UI.WebControls;
using
System.Web.UI.WebControls.WebParts;
using
System.Web.UI.HtmlControls;
using
System.Text.RegularExpressions;
/// <summary>
/// FiterHtml 的摘要说明
/// </summary>
public
class
FiterHtml
{
public
FiterHtml()
{
}
/// <summary>
/// 去除HTML标记
/// </summary>
public
static
string
NoHTML(
string
Htmlstring)
{
//删除脚本
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"<script[^>]*?>.*?</script>"
,
""
,
RegexOptions
.
IgnoreCase);
//删除HTML
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"<(.[^>]*)>"
,
""
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"([\r\n])[\s]+"
,
""
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"-->"
,
""
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"<!--.*"
,
""
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(quot|#34);"
,
"\""
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(amp|#38);"
,
"&"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(lt|#60);"
,
"<"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(gt|#62);"
,
">"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(nbsp|#160);"
,
" "
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(iexcl|#161);"
,
"\xa1"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(cent|#162);"
,
"\xa2"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(pound|#163);"
,
"\xa3"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&(copy|#169);"
,
"\xa9"
,
RegexOptions
.
IgnoreCase);
Htmlstring
=
Regex
.
Replace(
Htmlstring
,
@"&#(\d+);"
,
""
,
RegexOptions
.
IgnoreCase);
Htmlstring
.
Replace(
"<"
,
"");
Htmlstring
.
Replace(
">"
,
"");
Htmlstring
.
Replace(
"\r\n"
,
"");
Htmlstring
=
HttpContext
.
Current
.
Server
.
HtmlEncode(
Htmlstring
).
Trim();
return
Htmlstring;
}
public
static
string
StripHTML(
string
strHtml)
{
string
[]
aryReg
=
{
@"<script[^>]*?>.*?</script>"
,
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\["
,
@"'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>"
,
@"([\r\n])[\s]+"
,
@"&(quot|#34);"
,
@"&(amp|#38);"
,
@"&(lt|#60);"
,
@"&(gt|#62);"
,
@"&(nbsp|#160);"
,
@"&(iexcl|#161);"
,
@"&(cent|#162);"
,
@"&(pound|#163);"
,
@"&(copy|#169);"
,
@"&#(\d+);"
,
@"-->"
,
@"<!--.*\n"
};
string
[]
aryRep
=
{
""
,
""
,
""
,
"\""
,
"&"
,
"<"
,
">"
,
" "
,
"\xa1"
,
//chr(161),
"\xa2"
,
//chr(162),
"\xa3"
,
//chr(163),
"\xa9"
,
//chr(169),
""
,
"\r\n"
,
""
};
string
newReg
=
aryReg
[
0
];
string
strOutput
=
strHtml;
for (
int
i
=
0;
i
<
aryReg
.
Length;
i
++)
{
Regex
regex
=
new
Regex(
aryReg
[
i
],
RegexOptions
.
IgnoreCase);
strOutput
=
regex
.
Replace(
strOutput
,
aryRep
[
i
]);
}
strOutput
.
Replace(
"<"
,
"");
strOutput
.
Replace(
">"
,
"");
strOutput
.
Replace(
"\r\n"
,
"");
return
strOutput;
}
/// <summary>
/// 移除HTML标签
/// </summary>
/// <param name="HTMLStr">HTMLStr</param>
public
static
string
ParseTags(
string
HTMLStr)
{
return
System
.
Text
.
RegularExpressions
.
Regex
.
Replace(
HTMLStr
,
"<[^>]*>"
,
"");
}
/// <summary>
/// 取出文本中的图片地址
/// </summary>
/// <param name="HTMLStr">HTMLStr</param>
public
static
string
GetImgUrl(
string
HTMLStr)
{
string
str
=
string
.
Empty;
string
sPattern
=
@"^<img\s+[^>]*>";
Regex
r
=
new
Regex(
@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>"
,
RegexOptions
.
Compiled);
Match
m
=
r
.
Match(
HTMLStr
.
ToLower());
if (
m
.
Success)
str
=
m
.
Result(
"${url}");
return
str;
}
}