三种通过配置代理获取http页面信息的方法

package test.jsoup;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 * <pre>
 * 业务名:三种获取页面信息的方法
 * 功能说明: 
 * 编写日期:	2016年2月2日
 * 作者:	liuyx
 * 
 * 历史记录
 * 1、修改日期:
 *    修改人:
 *    修改内容:
 * </pre>
 */
public class TestJsoup
{

	/**
	 * 代理服务器地址
	 */
	private static String host = "proxy.william.com";
	/**
	 * 代理服务器断开
	 */
	private static String port = "8080";
	/**
	 * 账号
	 */
	private static String username = "";
	/**
	 * 密码
	 */
	private static String password = "";

	public static void main(String[] args) throws Exception
	{
		String url = "http://www.baidu.com";
		Document doc = getHTMLByHttpClient(url);
		System.out.println(doc);
	}

	/**
	 * 方法说明:jsoup 获取页面信息 since jsoup1.8.3
	 *
	 * @param url
	 * @return
	 * @throws IOException
	 */
	private static Document getHTMLByJsoup(String url) throws IOException
	{
		System.getProperties().setProperty("proxySet", "true");
		System.getProperties().setProperty("http.proxyHost", host);
		System.getProperties().setProperty("http.proxyPort", port);
		// System.getProperties().setProperty("http.proxyUser", username);
		// System.getProperties().setProperty("http.proxyPassword", password);
		System.getProperties().setProperty("http.nonProxyHosts", "localhost|127.0.0.1");
		return Jsoup.connect(url).get();
	}

	/**
	 * 方法说明:java.net 获取页面信息
	 *
	 * @param url
	 * @return
	 */
	private static Document getHTMLByNet(String url)
	{
		StringBuffer tmp = new StringBuffer();
		URL _url;
		HttpURLConnection uc;
		try {
			_url = new URL(url);
			Proxy proxy = new Proxy(Proxy.Type.HTTP,
					new InetSocketAddress(host, Integer.valueOf(port)));
			uc = (HttpURLConnection) _url.openConnection(proxy);
			// String encoded = new String(
			// Base64.encodeBase64(new String(username + ":" + password).getBytes()));
			// uc.setRequestProperty("Proxy-Authorization", "Basic " + encoded);
			uc.connect();
			String line = null;
			BufferedReader in = new BufferedReader(new InputStreamReader(uc.getInputStream()));
			while ((line = in.readLine()) != null) {
				tmp.append(line);
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return Jsoup.parse(tmp.toString());
	}

	/**
	 * 方法说明:httpClient 获取页面信息 since httpClient4.4
	 *
	 * @param url
	 * @return
	 */
	private static Document getHTMLByHttpClient(String url)
	{
		CloseableHttpClient httpclient = HttpClients.createDefault();
		StringBuffer sb = new StringBuffer();
		try {
			HttpResponse response = httpclient.execute(new HttpHost(host, Integer.valueOf(port)),
					new HttpGet(url));
			// HttpResponse response = httpclient.execute(new HttpHost(host,
			// Integer.valueOf(port)),new HttpGet(url),createBasicAuthContext(username, password));

			HttpEntity entry = response.getEntity();

			if (entry != null) {
				InputStreamReader is = new InputStreamReader(entry.getContent());
				BufferedReader br = new BufferedReader(is);
				String str = null;
				while ((str = br.readLine()) != null) {
					sb.append(str.trim());
				}
				br.close();
			}

		} catch (ClientProtocolException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return Jsoup.parse(sb.toString());
	}

	/**
	 * 方法说明:httpClient 封装认证信息 since httpClient4.4
	 *
	 * @param username
	 * @param password
	 * @return
	 */
	private static HttpClientContext createBasicAuthContext(String username, String password)
	{
		CredentialsProvider credsProvider = new BasicCredentialsProvider();
		Credentials defaultCreds = new UsernamePasswordCredentials(username, password);
		credsProvider.setCredentials(new AuthScope(host, Integer.valueOf(port)), defaultCreds);

		AuthCache authCache = new BasicAuthCache();
		BasicScheme basicAuth = new BasicScheme();
		authCache.put(new HttpHost(host, Integer.valueOf(port)), basicAuth);

		HttpClientContext context = HttpClientContext.create();
		context.setCredentialsProvider(credsProvider);
		context.setAuthCache(authCache);
		return context;
	}

}

pom.xml

<dependency>
			<!-- jsoup HTML parser library @ http://jsoup.org/ -->
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.8.3</version>
		</dependency>
		<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpclient</artifactId>
			<version>4.4</version>
		</dependency>


你可能感兴趣的:(三种通过配置代理获取http页面信息的方法)