基于node-http-proxy的脚本:功能更新,现在支持把GFW屏蔽的URL作快速404失败返回处理;支持把墙外的CDN url映射为本地host

"use strict";
//to support let-of syntax;

var ProxyPort = 8888;

var http = require('http'),
    net = require('net'),
    url = require('url'),
    httpProxy = require('http-proxy'); //需要安装Node 5.0, 然后执行npm install http-proxy --save

var proxy = httpProxy.createProxyServer({
    autoRewrite: true, //??
});

proxy.on('error', function (err, req, res) {
/*
  res.writeHead(500, {
    'Content-Type': 'text/plain'
  });
  res.end('httpProxy代理请求发生IO错误:URL='+req.url);
*/
});

function isBlockedByGFW(host){
    var exact_blocked_hosts = {
        "ajax.googleapis.com": true,
        "fonts.googleapis.com": true,
        "cdn.datatables.net": true,
        "www.google.com": true,
        "www.slideshare.net": true,
        "twitter.com": true,
        "botanwang.com": true,
        "facebook.com": true,
    };
    //TODO:使用正则表达式来匹配被GFW阻塞的网站?
    if( exact_blocked_hosts[host]==true )
        return true;
    var suffix_blocked_hosts = [
        ".facebook.com",
        ".google.com",
        ".googleapis.com",
    ];
    for( let suffix of suffix_blocked_hosts){
        if( host.endsWith(suffix) ){
            return true;
        }
    }
    return false;
}

function check_url_remapping_needed(url){
    var gfw_url_remapping = {
        "http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js": "http://127.0.0.1/jquery-1.12.3.min.js",
        "http://ajax.googleapis.com/ajax/libs/jquery/1.8/jquery.min.js": "http://127.0.0.1/jquery-1.12.3.min.js",
            //for http://www.menscyzo.com/
    };
    return gfw_url_remapping[url];
}

//支持http CONNECT协议的https代理:(当然,修改不了https连接的内容)
function connect(cReq, cSock) {
    //console.log("CONNECT: cReq.url="+cReq.url);
    var u = url.parse('http://' + cReq.url);
    if (isBlockedByGFW(u.hostname)){
        //console.log("CONNECT to host="+u.hostname+" is blocked by GFW, fast-return 404 instead");
        cSock.write('HTTP/1.1 404 CONNECT request blocked to avoid GFW timeout\r\n\r\n');
        cSock.end();
        return;
    }
    var pSock = net.connect(u.port||80, u.hostname, function() {
        cSock.write('HTTP/1.1 200 Connection Established\r\n\r\n');
        pSock.pipe(cSock);
    }).on('error', function(e) {
        console.log("CONNECT: error! "+e);
        cSock.end();
    });
    cSock.pipe(pSock);
}

var server = http.createServer();
server.on('request', function(req, res) {
    console.log("REQUEST req.url=" + req.url);
    var u = url.parse(req.url);
    //console.log("req.url-parsed.path=" + u.path); //u.path包含了查询参数,而u.pathname不包含 
    //console.log("REQUEST u="+JSON.stringify(u));
    var mapped_local_url = check_url_remapping_needed(req.url);
    if (mapped_local_url){//将墙外的CDN url资源地址映射为本地localhost路径
        console.log("remap "+req.url+" to "+mapped_local_url);
        var mapped_u = url.parse(mapped_local_url);//?
        var options = {
          hostname: mapped_u.hostname,
          port: 80,
          path: mapped_u.path,
          method: 'GET',
          headers: req.headers,
        };
        var proxy_request = http.request(options, (res2) => {
           res2.pipe(res); 
        });
        proxy_request.end();
    }
    else if (u.hostname=="hm.baidu.com" && u.pathname.match(/h.js$/)){
        //屏蔽baidu的js脚本;
        res.writeHead(500, {
            'Connection': 'close',
            'Content-Type': 'application/x-javascript'
          });
        res.end('');
    }else if (isBlockedByGFW(u.hostname)){
        console.log("REQUEST host="+u.hostname+" is blocked by GFW, fast-return 404 instead");
        res.writeHead(404, {
            'Connection': 'close',
            //'Content-Type': 'application/x-javascript'
          });
        res.end('');
    }
    else{
        proxy.web(req, res, {
            target: req.url, //needs apply patch https://github.com/gagern/node-http-proxy/commit/35000fc0d7dc0a6073ac37db097b73575a861d34
            prependPath: false,
            secure: false
        });
    }
}).on('connect', connect)
.on('error', function(err){
    //这里error指的应该是客户端浏览器到代理脚本之间的连接
    console.log("客户端连接错误: "+JSON.stringify(err));
});

console.log("node-proxy-server: listening on port "+ProxyPort)
server.listen(ProxyPort);

遗留问题:尽管我现在的代码已经处理了当CONNECT地址是被gfw屏蔽的情况下直接返回404,并断开连接,但是Chrome浏览器似乎仍然会不停发送CONNECT请求?

感觉Node的性能还是挺不错的,至少感觉比我用Python写的版本要好,而且我尝试用上了let-of和=>语法,哈哈

你可能感兴趣的:(JavaScript,node,ES6,URL重新映射,http代理服务器)