给aria2打了个补丁,支持国内多数下载网站防盗链链接下载

这个补丁的作用,修改重定向时对引用及cookie的处理,使其能正确下载国内多数下载网站的防盗链链接。

补丁修改的机制说明:
在30x重定向时,RFC的HTTP标准规定不能发送Cookie, 不能使用引用。但国内多数下载站点这两者都同时使用,而且设置Cookie的域名比较复杂,这些都被作为防盗链的手段广泛使用,使其只能在浏览器中下载,遵守标准的普通下载工具却无法正常下载这些链接。
针对这种情况,在aria2中修正机制,在记录cookie的时候对域名进行处理,只保留有二级域名, 在查找cookie的时候也只是使用二级域名进行查询。
在做redirect的时候,仍旧按照服务器的响应记录cookie, 并在redirect请求中使用这些cookie。
在做redirect的时候,使用本次地址作为引用发送给服务器。
用户在--header参数中传递的Cookie会与本次服务器设置的Cookie合并,一起用于下一次转向链接的请求。

测试网站列表:
非凡软件下载
sina下载 down.tech.sina.com.cn
www.greendown.cn
...
如果您测试了其他站点,请email我。

补丁如下:
aria2-1.9.2_redirect_no_cookie_no_refer.patch
diff --git a/src/HttpRequest.cc b/src/HttpRequest.cc
index c6c0afb..dfc981f 100644
--- a/src/HttpRequest.cc
+++ b/src/HttpRequest.cc
@@ -218,9 +218,10 @@ std::string HttpRequest::createRequest()
     builtinHds.push_back(std::make_pair("Referer:", getPreviousURI()));
   }
   if(!_cookieStorage.isNull()) {
+      std::string baseDomain = util::getBaseDomainName(getHost());
     std::string cookiesValue;
     std::vector<Cookie> cookies =
-      _cookieStorage->criteriaFind(getHost(),
+        _cookieStorage->criteriaFind(baseDomain, // getHost(),
                                    getDir(),
                                    Time().getTime(),
                                    getProtocol() == Request::PROTO_HTTPS ?
@@ -235,15 +236,25 @@ std::string HttpRequest::createRequest()
   }
   for(std::vector<std::pair<std::string, std::string> >::const_iterator i =
         builtinHds.begin(), eoi = builtinHds.end(); i != eoi; ++i) {
-    std::vector<std::string>::const_iterator j = _headers.begin();
-    std::vector<std::string>::const_iterator jend = _headers.end();
+    // std::vector<std::string>::const_iterator j = _headers.begin();
+    // std::vector<std::string>::const_iterator jend = _headers.end();
+    std::vector<std::string>::iterator j = _headers.begin();
+    std::vector<std::string>::iterator jend = _headers.end();
     for(; j != jend; ++j) {
       if(util::startsWith(*j, (*i).first)) {
         break;
       }
     }
+    // hacked by [email protected]
+    // if user header is not cookie, override it, or leave it there no touched.
+    // for this case, the best methord is combine to cookies part to one Cookie: header line
     if(j == jend) {
       strappend(requestLine, (*i).first, " ", (*i).second, A2STR::CRLF);
+    } else {
+        // combine the new cookie to header line
+        if (util::startsWith((*i).first, "Cookie") && !util::endsWith(*j, (*i).second)) {
+            (*j) += ";" + (*i).second;
+        }
     }
   }
   // append additional headers given by user.
diff --git a/src/HttpResponse.cc b/src/HttpResponse.cc
index 6957fe9..79de4bd 100644
--- a/src/HttpResponse.cc
+++ b/src/HttpResponse.cc
@@ -116,11 +116,13 @@ std::string HttpResponse::determinFilename() const

void HttpResponse::retrieveCookie()
{
+    // hacked by [email protected]
+    std::string baseDomain = util::getBaseDomainName(httpRequest->getHost());
   std::vector<std::string> v = httpHeader->get(HttpHeader::SET_COOKIE);
   for(std::vector<std::string>::const_iterator itr = v.begin(), eoi = v.end();
       itr != eoi; ++itr) {
     httpRequest->getCookieStorage()->parseAndStore(*itr,
-                                                   httpRequest->getHost(),
+                                                   baseDomain, // httpRequest->getHost(),
                                                    httpRequest->getDir());
   }
}
diff --git a/src/Request.cc b/src/Request.cc
index 9fbba5a..4204c0a 100644
--- a/src/Request.cc
+++ b/src/Request.cc
@@ -125,7 +125,8 @@ void Request::setReferer(const std::string& uri)
}

bool Request::redirectUri(const std::string& uri) {
-  _previousUri = A2STR::NIL;
+    // _previousUri = A2STR::NIL;
+    _previousUri = _uri;  // hacked by [email protected]
   _supportsPersistentConnection = true;
   ++_redirectCount;
   std::string redirectedUri;
diff --git a/src/util.cc b/src/util.cc
index 58a37d4..bb10bdc 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -1305,6 +1305,48 @@ void removeMetalinkContentTypes(const SharedHandle<RequestGroup>& group)
   }
}

+// hacked by [email protected]
+/*
+  only leave the last 2 or three domain parts
+ */
+    std::string getBaseDomainName(const std::string &s)
+    {
+        std::string baseDomain;
+        std::string ts = s;
+        if (util::isNumericHost(s)) {
+            return s;
+        }
+        size_t colonPos = ts.find(':');
+        if (colonPos != std::string::npos) {
+            ts.resize(colonPos);
+        }
+
+        std::vector<std::string> domParts;
+        std::string delims(".");
+        util::split(ts, std::back_inserter(domParts), delims);
+       
+        int minDomParts = 2;
+        int domLevel = domParts.size();
+        if (domParts.at(domLevel - 1) == "cn"
+            || domParts.at(domLevel - 1) == "cc"
+            || domParts.at(domLevel - 1) == "us") {
+            if (domParts.at(domLevel - 2) == "org"
+                || domParts.at(domLevel - 2) == "com"
+                || domParts.at(domLevel - 2) == "net"
+                || domParts.at(domLevel - 2) == "info") {
+                minDomParts = 3;
+            }
+        }
+
+        baseDomain = domParts.at(domLevel - 2) + "." + domParts.at(domLevel - 1);
+        if (minDomParts == 3) {
+            baseDomain = domParts.at(domLevel - 3) + "." + baseDomain;
+        }
+
+        return baseDomain;
+    }
+
+
} // namespace util

} // namespace aria2
diff --git a/src/util.h b/src/util.h
index 9e1c2a5..01b6a1c 100644
--- a/src/util.h
+++ b/src/util.h
@@ -395,6 +395,9 @@ bool inSameCidrBlock(const std::string& ip1, const std::string& ip2, int bits);

void removeMetalinkContentTypes(const SharedHandle<RequestGroup>& group);

+    // hacked by [email protected]
+    std::string getBaseDomainName(const std::string &s);
+
} // namespace util

} // namespace aria2

你可能感兴趣的:(.net,浏览器,git,软件测试,J#)