这几天发现项目的微博抓取没数据展示了,查了一下数据库,原来上个月26号就停了
奇了个怪的,然后就追溯了一下日志
找到了错误日志:
[ERROR][2019-09-16 06:02:46,943][com.spider.controler.weibo.WeiBoParser.parse(WeiBoParser.java:40)]get weibo user home error!url=http://weibo.com/u/3886607933?refer_flag=1001030101_&is_all=1
javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.ssl.Alerts.getSSLException(Alerts.java:192)
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1904)
at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:279)
at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:273)
at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1446)
at sun.security.ssl.ClientHandshaker.processMessage(ClientHandshaker.java:209)
at sun.security.ssl.Handshaker.processLoop(Handshaker.java:901)
at sun.security.ssl.Handshaker.process_record(Handshaker.java:837)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1023)
at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1332)
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1359)
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1343)
at org.apache.http.conn.ssl.SSLConnectionSocketFactory.createLayeredSocket(SSLConnectionSocketFactory.java:394)
at org.apache.http.conn.ssl.SSLConnectionSocketFactory.connectSocket(SSLConnectionSocketFactory.java:353)
at org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:141)
at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.connect(PoolingHttpClientConnectionManager.java:353)
at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:380)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:236)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:71)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:220)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:164)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:139)
at org.bigdatacn.sfgk.wlzbs.controler.weibo.WeiBoUserHomeParser.parse(WeiBoUserHomeParser.java:38)
at org.bigdatacn.sfgk.wlzbs.controler.weibo.task.WeiBoUrlTask.crawl(WeiBoUrlTask.java:127)
at org.bigdatacn.sfgk.wlzbs.controler.weibo.task.WeiBoUrlTask.crawlJob(WeiBoUrlTask.java:101)
at org.bigdatacn.sfgk.wlzbs.controler.weibo.task.WeiBoUrlTask.grab(WeiBoUrlTask.java:71)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.springframework.scheduling.support.ScheduledMethodRunnable.run(ScheduledMethodRunnable.java:65)
at org.springframework.scheduling.support.DelegatingErrorHandlingRunnable.run(DelegatingErrorHandlingRunnable.java:54)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:304)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:178)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:385)
at sun.security.validator.PKIXValidator.engineValidate(PKIXValidator.java:292)
at sun.security.validator.Validator.validate(Validator.java:260)
at sun.security.ssl.X509TrustManagerImpl.validate(X509TrustManagerImpl.java:326)
at sun.security.ssl.X509TrustManagerImpl.checkTrusted(X509TrustManagerImpl.java:231)
at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(X509TrustManagerImpl.java:126)
at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1428)
... 38 more
Caused by: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.provider.certpath.SunCertPathBuilder.engineBuild(SunCertPathBuilder.java:196)
at java.security.cert.CertPathBuilder.build(CertPathBuilder.java:268)
at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:380)
... 44 more
然后就在本地项目测试发现竟然是正常的
观察了一下,本地用的jdk8测试,没有出现问题,然后服务器上是jdk7,所以出现了这个问题,这是由于本地与服务器所使用的SSL/TLS版本不一致。服务器使用的TLS版本高,而客户端支持的TLS版本低。Java 8默认支持TLSv1.2版本。
微博网页都变https了
具体的就不写了,随便百度就能找到
因为有人是windows,有人linux,方法也不一样
/*
* Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Sun Microsystems nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.security.KeyStore;
import java.security.MessageDigest;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLException;
import javax.net.ssl.SSLSocket;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.TrustManagerFactory;
import javax.net.ssl.X509TrustManager;
public class InstallCert {
public static void main(String[] args) throws Exception {
String host;
int port;
char[] passphrase;
if ((args.length == 1) || (args.length == 2)) {
String[] c = args[0].split(":");
host = c[0];
port = (c.length == 1) ? 443 : Integer.parseInt(c[1]);
String p = (args.length == 1) ? "changeit" : args[1];
passphrase = p.toCharArray();
} else {
System.out
.println("Usage: java InstallCert [:port] [passphrase]");
return;
}
File file = new File("jssecacerts");
if (file.isFile() == false) {
char SEP = File.separatorChar;
File dir = new File(System.getProperty("java.home") + SEP + "lib"
+ SEP + "security");
file = new File(dir, "jssecacerts");
if (file.isFile() == false) {
file = new File(dir, "cacerts");
}
}
System.out.println("Loading KeyStore " + file + "...");
InputStream in = new FileInputStream(file);
KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType());
ks.load(in, passphrase);
in.close();
SSLContext context = SSLContext.getInstance("TLS");
TrustManagerFactory tmf = TrustManagerFactory
.getInstance(TrustManagerFactory.getDefaultAlgorithm());
tmf.init(ks);
X509TrustManager defaultTrustManager = (X509TrustManager) tmf
.getTrustManagers()[0];
SavingTrustManager tm = new SavingTrustManager(defaultTrustManager);
context.init(null, new TrustManager[] { tm }, null);
SSLSocketFactory factory = context.getSocketFactory();
System.out
.println("Opening connection to " + host + ":" + port + "...");
SSLSocket socket = (SSLSocket) factory.createSocket(host, port);
socket.setSoTimeout(10000);
try {
System.out.println("Starting SSL handshake...");
socket.startHandshake();
socket.close();
System.out.println();
System.out.println("No errors, certificate is already trusted");
} catch (SSLException e) {
System.out.println();
e.printStackTrace(System.out);
}
X509Certificate[] chain = tm.chain;
if (chain == null) {
System.out.println("Could not obtain server certificate chain");
return;
}
BufferedReader reader = new BufferedReader(new InputStreamReader(
System.in));
System.out.println();
System.out.println("Server sent " + chain.length + " certificate(s):");
System.out.println();
MessageDigest sha1 = MessageDigest.getInstance("SHA1");
MessageDigest md5 = MessageDigest.getInstance("MD5");
for (int i = 0; i < chain.length; i++) {
X509Certificate cert = chain[i];
System.out.println(" " + (i + 1) + " Subject "
+ cert.getSubjectDN());
System.out.println(" Issuer " + cert.getIssuerDN());
sha1.update(cert.getEncoded());
System.out.println(" sha1 " + toHexString(sha1.digest()));
md5.update(cert.getEncoded());
System.out.println(" md5 " + toHexString(md5.digest()));
System.out.println();
}
System.out
.println("Enter certificate to add to trusted keystore or 'q' to quit: [1]");
String line = reader.readLine().trim();
int k;
try {
k = (line.length() == 0) ? 0 : Integer.parseInt(line) - 1;
} catch (NumberFormatException e) {
System.out.println("KeyStore not changed");
return;
}
X509Certificate cert = chain[k];
String alias = host + "-" + (k + 1);
ks.setCertificateEntry(alias, cert);
OutputStream out = new FileOutputStream("jssecacerts");
ks.store(out, passphrase);
out.close();
System.out.println();
System.out.println(cert);
System.out.println();
System.out
.println("Added certificate to keystore 'jssecacerts' using alias '"
+ alias + "'");
}
private static final char[] HEXDIGITS = "0123456789abcdef".toCharArray();
private static String toHexString(byte[] bytes) {
StringBuilder sb = new StringBuilder(bytes.length * 3);
for (int b : bytes) {
b &= 0xff;
sb.append(HEXDIGITS[b >> 4]);
sb.append(HEXDIGITS[b & 15]);
sb.append(' ');
}
return sb.toString();
}
private static class SavingTrustManager implements X509TrustManager {
private final X509TrustManager tm;
private X509Certificate[] chain;
SavingTrustManager(X509TrustManager tm) {
this.tm = tm;
}
public X509Certificate[] getAcceptedIssuers() {
throw new UnsupportedOperationException();
}
public void checkClientTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
throw new UnsupportedOperationException();
}
public void checkServerTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
this.chain = chain;
tm.checkServerTrusted(chain, authType);
}
}
}
在电脑某个你熟悉的地方新建一个txt
把上面的代码放到txt中,然后保存,重命名文件为
InstallCert.java
然后编译一下java文件
在文件夹内按住shift然后右键
选择在此处打开命令窗口
编译:
javac InstallCert.java
运行:
java InstallCert www.weibo.com
注:我是因为微博抓不到生成微博证书
所以 java InstallCert (要生成证书的网站)
输完命令之后回车出现如图信息
根据提示输入 1 然后回车
[
[
Version: V3
Subject: CN=*.weibo.com, O="Sina.com Technology (China) Co., Ltd.", L=Beijin
ST=Beijing, C=CN
Signature Algorithm: SHA384withECDSA, OID = 1.2.840.10045.4.3.3
Key: Sun EC public key, 256 bits
public x coord: 123315875432783152221904536036809075583890703163686656991850
536779801316622
public y coord: 833422678118437284553618318631202115878810846320782411162031
483753735996056
parameters: secp256r1 [NIST P-256, X9.62 prime256v1] (1.2.840.10045.3.1.7)
Validity: [From: Fri Aug 16 10:12:01 GMT+08:00 2019,
To: Thu Oct 07 20:59:24 GMT+08:00 2021]
Issuer: CN=GlobalSign ECC OV SSL CA 2018, O=GlobalSign nv-sa, C=BE
SerialNumber: [ 33aac3b1 aea0f9c3 8a6a1f7a]
Certificate Extensions: 10
[1]: ObjectId: 1.3.6.1.4.1.11129.2.4.2 Criticality=false
Extension unknown: DER encoded OCTET string =
0000: 04 82 01 6E 04 82 01 6A 01 68 00 76 00 5C DC 43 ...n...j.h.v.\.C
0010: 92 FE E6 AB 45 44 B1 5E 9A D4 56 E6 10 37 FB D5 ....ED.^..V..7..
0020: FA 47 DC A1 73 94 B2 5E E6 F6 C7 0E CA 00 00 01 .G..s..^........
0030: 6C 98 32 3A E2 00 00 04 03 00 47 30 45 02 21 00 l.2:......G0E.!.
0040: 99 03 0C 54 C6 34 07 FE DA E5 C1 D1 85 45 4F 2A ...T.4.......EO*
0050: 50 94 52 94 56 9B 2A E8 CF F1 0D 90 A8 5E 10 98 P.R.V.*......^..
0060: 02 20 2F 40 4B C1 BE 4D A5 11 45 49 F5 0C 73 66 . /@K..M..EI..sf
0070: DE DC 63 43 26 35 83 3B B7 BF F7 A9 4C 5D F2 74 ..cC&5.;....L].t
0080: CD CE 00 76 00 EE 4B BD B7 75 CE 60 BA E1 42 69 ...v..K..u.`..Bi
0090: 1F AB E1 9E 66 A3 0F 7E 5F B0 72 D8 83 00 C4 7B ....f..._.r.....
00A0: 89 7A A8 FD CB 00 00 01 6C 98 32 37 BB 00 00 04 .z......l.27....
00B0: 03 00 47 30 45 02 20 5E E3 90 81 5B 9E A9 BB B6 ..G0E. ^...[....
00C0: 19 11 86 86 61 FA 9F 06 84 39 FC 5D B7 86 DC 19 ....a....9.]....
00D0: B8 D9 3A 81 04 92 7C 02 21 00 F5 3A B9 D2 17 29 ..:.....!..:...)
00E0: C9 80 80 56 FB 2A 80 9B 89 BD 04 DC 47 B7 84 74 ...V.*......G..t
00F0: 1C 57 86 31 1C 74 38 5E 4D 14 00 76 00 55 81 D4 .W.1.t8^M..v.U..
0100: C2 16 90 36 01 4A EA 0B 9B 57 3C 53 F0 C0 E4 38 ...6.J...W
在当前目录会有一个名为jssecacerts的文件
复制证书文件到jdk安装路径\jre\lib\security文件夹下
windows/Linux都是复制到jdk安装路径\jre\lib\security
复制证书到这个目录下
如果不想生成证书的话可以尝试java抓取时忽略掉证书。jsoup在调用前先执行下以下忽略证书请求就可以了。
try {
//先调用下忽略https证书的再请求才可以
HttpsUrlValidator.retrieveResponseFromServer(url);
doc = Jsoup
.connect(url)
.header("User-Agent",rand_agents)
.timeout(10000).get();
body = doc.getElementsByTag("body").html();
} catch (Exception e) {
log.info(e.getMessage());
}
HttpsUrlValidator 类:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;
public class HttpsUrlValidator {
static HostnameVerifier hv = new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session) {
System.out.println("Warning: URL Host: " + urlHostName + " vs. "
+ session.getPeerHost());
return true;
}
};
public final static String retrieveResponseFromServer(final String url) {
HttpURLConnection connection = null;
try {
URL validationUrl = new URL(url);
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(hv);
connection = (HttpURLConnection) validationUrl.openConnection();
final BufferedReader in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String line;
final StringBuffer stringBuffer = new StringBuffer(255);
synchronized (stringBuffer) {
while ((line = in.readLine()) != null) {
stringBuffer.append(line);
stringBuffer.append("\n");
}
return stringBuffer.toString();
}
} catch (final IOException e) {
System.out.println(e.getMessage());
return null;
} catch (final Exception e1){
System.out.println(e1.getMessage());
return null;
}finally {
if (connection != null) {
connection.disconnect();
}
}
}
public static void trustAllHttpsCertificates() throws Exception {
javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
javax.net.ssl.TrustManager tm = new miTM();
trustAllCerts[0] = tm;
javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext
.getInstance("SSL");
sc.init(null, trustAllCerts, null);
javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc
.getSocketFactory());
}
static class miTM implements javax.net.ssl.TrustManager,
javax.net.ssl.X509TrustManager {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public boolean isServerTrusted(
java.security.cert.X509Certificate[] certs) {
return true;
}
public boolean isClientTrusted(
java.security.cert.X509Certificate[] certs) {
return true;
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
}
}