java抓取Https协议url地址的源码的方法

阅读本文之前,请先阅读下文,通过下文将获取到keystore文件:

http://www.oschina.net/code/snippet_273576_18919

在此,感谢一下stevenliu ,不过经过我的测试,发现stevenliu 文章中的源码并没有抓取到页面源码。可能是我项目里面的HttpClient版本较高的原因。下面这个方法没有问题:

package org.phoenix.cases.webservice;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.security.KeyStore;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.List;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.TrustManagerFactory;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.io.IOUtils;
/**
 * java通过加载认证证书,抓取https的url源码方法
 * @author mengfeiyang
 *
 */
public class MyX509TrustManager implements X509TrustManager {
    private X509TrustManager sunJSSEX509TrustManager;
   
    public MyX509TrustManager(String keystoreFile,String pass) throws Exception {
        KeyStore ks = KeyStore.getInstance("JKS");
        ks.load(new FileInputStream(keystoreFile),pass.toCharArray());
        TrustManagerFactory tmf = TrustManagerFactory.getInstance("SunX509", "SunJSSE");
        tmf.init(ks);
        TrustManager tms [] = tmf.getTrustManagers();
        for (int i = 0; i < tms.length; i++) {
            if (tms[i] instanceof X509TrustManager) {
                sunJSSEX509TrustManager = (X509TrustManager) tms[i];
                return;
            }
        }
        throw new Exception("Couldn't initialize");
    }
    
    @Override
    public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
        try {
            sunJSSEX509TrustManager.checkClientTrusted(chain, authType);
        } catch (CertificateException excep) {
         excep.printStackTrace();
        }
    }
    @Override
    public void checkServerTrusted(X509Certificate[] chain, String authType) {
        try {
            sunJSSEX509TrustManager.checkServerTrusted(chain, authType);
        } catch (CertificateException excep) {
         excep.printStackTrace();
        }
    }
    @Override
    public X509Certificate[] getAcceptedIssuers() {
        return sunJSSEX509TrustManager.getAcceptedIssuers();
    }
    
    public static void main(String[] args) throws Exception {
        TrustManager[] tm = { new MyX509TrustManager("E:\\mycert.keystore","123456") };
        SSLContext sslContext = SSLContext.getInstance("SSL", "SunJSSE");
        sslContext.init(null, tm, new SecureRandom());
        SSLSocketFactory ssf = sslContext.getSocketFactory();
        URL myURL = new URL("https://beta.tf.360.cn/search/mapi?keyword=途牛");
        HttpsURLConnection httpsConn = (HttpsURLConnection) myURL.openConnection();
        httpsConn.setSSLSocketFactory(ssf);
        InputStreamReader insr = new InputStreamReader(httpsConn.getInputStream());
        List<String> ls = IOUtils.readLines(insr);
        
        for(String l : ls){
         System.out.println(l);//打印源码
        }
        
        httpsConn.getResponseCode();//获取的状态码
 }
}

在我的phoenixframework自动化平台的接口测试模块中,下个版本将会增加https的操作。因为发现现在越来越多的网站是https了。phoenixframework平台网站:http://www.cewan.la

你可能感兴趣的:(自动化测试,网页爬虫,https抓取)