package test.jsoup; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.Proxy; import java.net.URL; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.auth.AuthScope; import org.apache.http.auth.Credentials; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.AuthCache; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.CredentialsProvider; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.impl.auth.BasicScheme; import org.apache.http.impl.client.BasicAuthCache; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; /** * <pre> * 业务名:三种获取页面信息的方法 * 功能说明: * 编写日期: 2016年2月2日 * 作者: liuyx * * 历史记录 * 1、修改日期: * 修改人: * 修改内容: * </pre> */ public class TestJsoup { /** * 代理服务器地址 */ private static String host = "proxy.william.com"; /** * 代理服务器断开 */ private static String port = "8080"; /** * 账号 */ private static String username = ""; /** * 密码 */ private static String password = ""; public static void main(String[] args) throws Exception { String url = "http://www.baidu.com"; Document doc = getHTMLByHttpClient(url); System.out.println(doc); } /** * 方法说明:jsoup 获取页面信息 since jsoup1.8.3 * * @param url * @return * @throws IOException */ private static Document getHTMLByJsoup(String url) throws IOException { System.getProperties().setProperty("proxySet", "true"); System.getProperties().setProperty("http.proxyHost", host); System.getProperties().setProperty("http.proxyPort", port); // System.getProperties().setProperty("http.proxyUser", username); // System.getProperties().setProperty("http.proxyPassword", password); System.getProperties().setProperty("http.nonProxyHosts", "localhost|127.0.0.1"); return Jsoup.connect(url).get(); } /** * 方法说明:java.net 获取页面信息 * * @param url * @return */ private static Document getHTMLByNet(String url) { StringBuffer tmp = new StringBuffer(); URL _url; HttpURLConnection uc; try { _url = new URL(url); Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(host, Integer.valueOf(port))); uc = (HttpURLConnection) _url.openConnection(proxy); // String encoded = new String( // Base64.encodeBase64(new String(username + ":" + password).getBytes())); // uc.setRequestProperty("Proxy-Authorization", "Basic " + encoded); uc.connect(); String line = null; BufferedReader in = new BufferedReader(new InputStreamReader(uc.getInputStream())); while ((line = in.readLine()) != null) { tmp.append(line); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return Jsoup.parse(tmp.toString()); } /** * 方法说明:httpClient 获取页面信息 since httpClient4.4 * * @param url * @return */ private static Document getHTMLByHttpClient(String url) { CloseableHttpClient httpclient = HttpClients.createDefault(); StringBuffer sb = new StringBuffer(); try { HttpResponse response = httpclient.execute(new HttpHost(host, Integer.valueOf(port)), new HttpGet(url)); // HttpResponse response = httpclient.execute(new HttpHost(host, // Integer.valueOf(port)),new HttpGet(url),createBasicAuthContext(username, password)); HttpEntity entry = response.getEntity(); if (entry != null) { InputStreamReader is = new InputStreamReader(entry.getContent()); BufferedReader br = new BufferedReader(is); String str = null; while ((str = br.readLine()) != null) { sb.append(str.trim()); } br.close(); } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return Jsoup.parse(sb.toString()); } /** * 方法说明:httpClient 封装认证信息 since httpClient4.4 * * @param username * @param password * @return */ private static HttpClientContext createBasicAuthContext(String username, String password) { CredentialsProvider credsProvider = new BasicCredentialsProvider(); Credentials defaultCreds = new UsernamePasswordCredentials(username, password); credsProvider.setCredentials(new AuthScope(host, Integer.valueOf(port)), defaultCreds); AuthCache authCache = new BasicAuthCache(); BasicScheme basicAuth = new BasicScheme(); authCache.put(new HttpHost(host, Integer.valueOf(port)), basicAuth); HttpClientContext context = HttpClientContext.create(); context.setCredentialsProvider(credsProvider); context.setAuthCache(authCache); return context; } }
pom.xml
<dependency> <!-- jsoup HTML parser library @ http://jsoup.org/ --> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.8.3</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.4</version> </dependency>