java爬虫学习之HttpClient

一、创建java maven项目,引入包文件

<dependency>
    <groupId>org.apache.httpcomponentsgroupId>
    <artifactId>httpclientartifactId>
    <version>4.5.2version>
dependency>

<dependency>
    <groupId>commons-iogroupId>
    <artifactId>commons-ioartifactId>
    <version>2.5version>
dependency>

二、编写主要代码

import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class Demo1 {

    public static void main(String[] args)throws Exception {
        CloseableHttpClient httpClient=HttpClients.createDefault(); // 创建httpClient实例
        HttpGet httpGet=new HttpGet("http:www.baidu.com"); // 创建httpget实例
        RequestConfig config=RequestConfig.custom()
                .setConnectTimeout(10000) // 设置连接超时时间 10秒钟
                .setSocketTimeout(20000) // 设置读取超时时间10秒钟
                .build();
        httpGet.setConfig(config);
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0");
        CloseableHttpResponse response=httpClient.execute(httpGet); // 执行http get请求
        HttpEntity entity=response.getEntity(); // 获取返回实体
        System.out.println("网页内容:"+EntityUtils.toString(entity, "utf-8")); // 获取网页内容
        response.close(); // response关闭
        httpClient.close(); // httpClient关闭
    }
}
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class Demo2 {

    public static void main(String[] args)throws Exception {
        CloseableHttpClient httpClient=HttpClients.createDefault(); // 创建httpClient实例
        HttpGet httpGet=new HttpGet("http://www.tuicool.com/"); // 创建httpget实例
        HttpHost proxy=new HttpHost("175.155.213.235", 9999); // 代理ip,百度代理ip,选择高匿代理ip
        RequestConfig config=RequestConfig.custom().setProxy(proxy).build();
        httpGet.setConfig(config);
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0");
        CloseableHttpResponse response=httpClient.execute(httpGet); // 执行http get请求
        HttpEntity entity=response.getEntity(); // 获取返回实体
        System.out.println("网页内容:"+EntityUtils.toString(entity, "utf-8")); // 获取网页内容
        response.close(); // response关闭
        httpClient.close(); // httpClient关闭
    }
}
import java.io.File;
import java.io.InputStream;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;

public class Demo3 {

    public static void main(String[] args)throws Exception {
        CloseableHttpClient httpClient=HttpClients.createDefault(); // 创建httpClient实例
        HttpGet httpGet=new HttpGet("http://www.java1234.com/gg/dljd4.gif"); // 创建httpget实例
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0");
        CloseableHttpResponse response=httpClient.execute(httpGet); // 执行http get请求
        HttpEntity entity=response.getEntity(); // 获取返回实体
        if(entity!=null){
            System.out.println("ContentType:"+entity.getContentType().getValue());
            InputStream inputStream=entity.getContent();
            FileUtils.copyToFile(inputStream, new File("C://dljd4.gif"));
        }
        response.close(); // response关闭
        httpClient.close(); // httpClient关闭
    }
}

以上为相关案例,另外,代理ip可以使用一个FIFO的QUEUE保存,ip被封时,自动更换代理ip。

你可能感兴趣的:(httpClient,httpClient,java)