本来标题应该加上输出过滤的,想想还是不加好了,看了老赵的关于整站关键字过滤的文章,感觉还晕乎,以前也没怎么用过httpModule这东西,在网上看了下相关的文章,讲的还是比较多的,看到一篇关于输出过滤的文章,原文地址是http://www.cnblogs.com/zgqys1980/archive/2008/09/02/1281895.html 基本还算能看明白,呵呵,我这个算是转载吧。
原文的输出过滤主要是通过ReleaseRequestState这个事件来处理的,即执行完所有请求事件处理后发生,也就是请求处理全部结束后再进行过滤,这么一来截获的是整个page的html源文件了,然后对源文件内容进行特殊字符替换过滤处理(当然在替换前可以将原内容进行相关写入操作保存起来),捕获完成是以"</html>"为标志,因为最终在浏览器中呈现的html文件内容都是以"</html>"结尾的。把输出过滤的处理类贴下面(保留原作者注释同时也加了几句注释,另外注释掉的部分代码是将替换前的原替换前的html内容保存处理)
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using System.Web.UI;
using System.Reflection;
using System.Text.RegularExpressions;
using System.IO;
namespace httpModuleOutputFilter
{
public class OutputFilter : IHttpModule
{
private HttpApplication _contextApplication;
public void Init(HttpApplication context)
{
_contextApplication = context;
//绑定事件,在对此请求处理过程全部结束后进行过滤操作
context.ReleaseRequestState += new EventHandler(context_ReleaseRequestState);
}
public void Dispose()
{
_contextApplication = null;
_contextApplication.Dispose();
}
/**/
/// <summary>
/// 对此HTTP请求处理的过程全部结束
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void context_ReleaseRequestState(object sender, EventArgs e)
{
HttpApplication application = (HttpApplication)sender;
//这里需要针对ASPX页面进行拦截
string[] temp = application.Request.CurrentExecutionFilePath.Split('.');
if (temp.Length > 0 && temp[temp.Length - 1].ToLower() == "aspx")
{
//装配过滤器
application.Response.Filter = new RawFilter(application.Response.Filter);
//绑定过滤器事件
//RawFilter filter = (RawFilter)application.Response.Filter;
//filter.OnRawDataRecordedEvent += new EventHandler<RawDataEventArgs>(filter_OnRawDataRecordedEvent);
}
}
/**/
/// <summary>
/// 当原始数据采集到以后,入库
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void filter_OnRawDataRecordedEvent(object sender, RawDataEventArgs e)
{
string allcode = e.SourceCode;//allcode就是截获的替换前的整个page页面的html
//WapSite.SiteDataClass wapdata = new WapSite.SiteDataClass();
//wapdata.WriteRawDataLog(allcode);
}
}
/**//// <summary>
/// 定义原始数据EventArgs,便于在截获完整数据后,由事件传递数据
/// </summary>
public class RawDataEventArgs : EventArgs
{
private string sourceCode;
public RawDataEventArgs(string SourceCode)
{
sourceCode = SourceCode;
}
public string SourceCode
{
get { return sourceCode; }
set { sourceCode = value; }
}
}
//自定义过滤器
public class RawFilter : Stream
{
Stream responseStream;
long position;
StringBuilder responseHtml;
/**//// <summary>
/// 当原始数据采集成功后激发。
/// </summary>
public event EventHandler<RawDataEventArgs> OnRawDataRecordedEvent;
public RawFilter(Stream inputStream)
{
responseStream = inputStream;
responseHtml = new StringBuilder();
}
//实现Stream 虚方法Filter Overrides
public override bool CanRead
{
get
{
return true;
}
}
public override bool CanSeek
{
get
{
return true;
}
}
public override bool CanWrite
{
get
{
return true;
}
}
public override void Close()
{
responseStream.Close();
}
public override void Flush()
{
responseStream.Flush();
}
public override long Length
{
get
{
return 0;
}
}
public override long Position
{
get
{
return position;
}
set
{
position = value;
}
}
public override int Read(byte[] buffer, int offset, int count)
{
return responseStream.Read(buffer, offset, count);
}
public override long Seek(long offset, SeekOrigin origin)
{
return responseStream.Seek(offset, origin);
}
public override void SetLength(long length)
{
responseStream.SetLength(length);
}
//关键的点,在HttpResponse 输入内容的时候,一定会调用此方法输入数据,所以要在此方法内截获数据
public override void Write(byte[] buffer, int offset, int count)
{
string strBuffer = System.Text.UTF8Encoding.UTF8.GetString(buffer, offset, count);
//采用正则,检查输入的是否有页面结束符</html>
Regex eof = new Regex("</html>", RegexOptions.IgnoreCase);
if (!eof.IsMatch(strBuffer))
{
//页面没有输出完毕,继续追加内容
responseHtml.Append(strBuffer);
}
else
{
//页面输出已经完毕,截获内容
responseHtml.Append(strBuffer);
string finalHtml = responseHtml.ToString();
//激发数据已经获取事件
//OnRawDataRecordedEvent(this, new RawDataEventArgs(finalHtml));//finalHtml就是源文件内容
//继续传递要发出的内容写入流
byte[] data = System.Text.UTF8Encoding.UTF8.GetBytes(finalHtml.Replace("城管","**"));//测试 捕获的源文件内特殊字符容替换后输出
responseStream.Write(data, 0, data.Length);
}
}
}
}
上面就是一个httpModule的简单应用了,至于这个方案的其它方面我就不说了,也不好说,主要是大家能看得明白这个简单的httpModule应用就好,另外在web项目的里添加编译好的httpModule.dll引用,还有config里加一句<httpModules>
<add name="OutputFilter" type="httpModuleOutputFilter.OutputFilter, httpModule" />
</httpModules>