使用HTTPclient做post请求的爬虫技术

 

 

 

package com.itheima.spider.httpclient;

 

 

import org.apache.http.Header;

import org.apache.http.HttpEntity;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.util.EntityUtils;

 

 

import java.io.IOException;

import java.util.ArrayList;

 

 

public class HttpClientPost {

    public static void main(String[] args) throws IOException {

        //1.确定URL

        String indexUrl = "http://www.itcast.cn";

        //2 发送请求,获得数据

        //2.1 创建httpclient对象

        CloseableHttpClient httpClient = HttpClients.createDefault();

        //2.2创建httppost对象--通过URL得到

        HttpPost httpPost = new HttpPost(indexUrl);

        //2.2.1 设置请求头

        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

        //2.2.2 设置请求参数

        //先获得请求参数的键值对 list集合

        //一,先建立泛型为 键值对的 list集合

        ArrayList basicNameValuePairs = new ArrayList;

        //二,给集合中增加数据

        basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

        basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

        basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

        basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

        basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

        //三.把上面的装有表单数据的list集合给封装到请求体中entity

        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

        //四.把entity给装到请求体中

        httpPost.setEntity(formEntity);

        //2.3发送请求,获得响应

        CloseableHttpResponse response = httpClient.execute(httpPost);

        //2.4 把response中数据给解析出来

        //2.4.1 获得响应头,并且判断是否成功访问

        int statusCode = response.getStatusLine().getStatusCode();

        if(statusCode == 200){

            Header[] headers = response.getHeaders();

            for (Header header : headers) {

                System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

            }

            //2.4.2 获得响应体

            HttpEntity entity = response.getEntity();

            //从响应体中获得网页内容并且打印

            String html = EntityUtils.toString(entity, "utf-8");

            System.out.println(html);

        }

        //2.5关闭资源

        httpClient.close();

 

 

    }

}

 

使用HTTPclient做post请求的爬虫技术_第1张图片

 

1.缺点一个爬取的URL

4faf39132985d7dc8e300c6e1b8366ace23.jpg

//1.确定URL

String indexUrl = "http://www.itcast.cn";

 

2发送请求,获取数据

 

2.1创建httpclient对象

1e9b68c1af36d94d8634f721e12d6bf962d.jpg

//2.1 创建httpclient对象

CloseableHttpClient httpClient = HttpClients.createDefault();

 

2.2创建就HTTPpost对象

7f528b0c54e7dad5ff2242963935df7f344.jpg

//2.2创建httpget对象

HttpPost httpPost = new HttpPost(indexUrl);

        

        设置请求头,请求体()

a382ed556f617461fda386808ee22d58b54.jpg

//2.2.1 设置请求头

httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

 

        设置请求参数:请求体(表单-数据和最下面的)

用于登录的登录名,密码 还有用户信息--键值对

使用HTTPclient做post请求的爬虫技术_第2张图片

 

//先获得请求参数的键值对 list集合

把上面的装有表单数据的list集合给封装到请求体中entity,再到请求体中

 

使用HTTPclient做post请求的爬虫技术_第3张图片

//先获得请求参数的键值对 list集合

//一,先建立泛型为 键值对的 list集合

ArrayList basicNameValuePairs = new ArrayList;

//二,给集合中增加数据--表单数据来源于 网页源码中的form data中的数据

basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

//三.把上面的装有表单数据的list集合给封装到formentity

UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

//四.把entity给装到请求体中

httpPost.setEntity(formEntity);

 

2.3发送请求,获取响应 -- response

e7e38b51f7baf021b860e66c21803d750ce.jpg

//2.3发送请求,获得响应

CloseableHttpResponse response = httpClient.execute(httpPost);

 

 

2.4获取响应头  响应状态码  响应体

使用HTTPclient做post请求的爬虫技术_第4张图片

//2.4 把response中数据给解析出来

//2.4.1 获得响应头,并且判断是否成功访问

int statusCode = response.getStatusLine().getStatusCode();

if(statusCode == 200){

Header[] headers = response.getHeaders();

for (Header header : headers) {

System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

}

 

 

        获得响应体

从响应体中能获得 网页,并且把html页面给打印出来; 如果是json格式,需要别的ajax处理

使用HTTPclient做post请求的爬虫技术_第5张图片

//2.4.2 获得响应体

HttpEntity entity = response.getEntity();

//从响应体中获得网页内容并且打印

String html = EntityUtils.toString(entity, "utf-8");

System.out.println(html);

}

 

 

2.5关闭资源

e07db677e2333c0869a657a10cade74763b.jpg

//2.5关闭资源

httpClient.close();

 

打印结果:

使用HTTPclient做post请求的爬虫技术_第6张图片

 

请求参数的样子

使用HTTPclient做post请求的爬虫技术_第7张图片

 

 

 

 

package com.itheima.spider.httpclient;

 

 

import org.apache.http.Header;

import org.apache.http.HttpEntity;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.util.EntityUtils;

 

 

import java.io.IOException;

import java.util.ArrayList;

 

 

public class HttpClientPost {

    public static void main(String[] args) throws IOException {

        //1.确定URL

        String indexUrl = "http://www.itcast.cn";

        //2 发送请求,获得数据

        //2.1 创建httpclient对象

        CloseableHttpClient httpClient = HttpClients.createDefault();

        //2.2创建httppost对象--通过URL得到

        HttpPost httpPost = new HttpPost(indexUrl);

        //2.2.1 设置请求头

        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

        //2.2.2 设置请求参数

        //先获得请求参数的键值对 list集合

        //一,先建立泛型为 键值对的 list集合

        ArrayList basicNameValuePairs = new ArrayList;

        //二,给集合中增加数据

        basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

        basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

        basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

        basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

        basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

        //三.把上面的装有表单数据的list集合给封装到请求体中entity

        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

        //四.把entity给装到请求体中

        httpPost.setEntity(formEntity);

        //2.3发送请求,获得响应

        CloseableHttpResponse response = httpClient.execute(httpPost);

        //2.4 把response中数据给解析出来

        //2.4.1 获得响应头,并且判断是否成功访问

        int statusCode = response.getStatusLine().getStatusCode();

        if(statusCode == 200){

            Header[] headers = response.getHeaders();

            for (Header header : headers) {

                System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

            }

            //2.4.2 获得响应体

            HttpEntity entity = response.getEntity();

            //从响应体中获得网页内容并且打印

            String html = EntityUtils.toString(entity, "utf-8");

            System.out.println(html);

        }

        //2.5关闭资源

        httpClient.close();

 

 

    }

}

 

使用HTTPclient做post请求的爬虫技术_第8张图片

 

1.缺点一个爬取的URL

616eca6b25e9647ebb680faee0d8276b8da.jpg

//1.确定URL

String indexUrl = "http://www.itcast.cn";

 

2发送请求,获取数据

 

2.1创建httpclient对象

613eaf740b97aeacdbf8e379ecb19e6b0f6.jpg

//2.1 创建httpclient对象

CloseableHttpClient httpClient = HttpClients.createDefault();

 

2.2创建就HTTPpost对象

a06067fc447fcda61a146ee696f1c92074c.jpg

//2.2创建httpget对象

HttpPost httpPost = new HttpPost(indexUrl);

        

        设置请求头,请求体()

90b90e10d93ddccf12ca9eae8047e6cdc0e.jpg

//2.2.1 设置请求头

httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

 

        设置请求参数:请求体(表单-数据和最下面的)

用于登录的登录名,密码 还有用户信息--键值对

使用HTTPclient做post请求的爬虫技术_第9张图片

 

//先获得请求参数的键值对 list集合

把上面的装有表单数据的list集合给封装到请求体中entity,再到请求体中

 

使用HTTPclient做post请求的爬虫技术_第10张图片

//先获得请求参数的键值对 list集合

//一,先建立泛型为 键值对的 list集合

ArrayList basicNameValuePairs = new ArrayList;

//二,给集合中增加数据--表单数据来源于 网页源码中的form data中的数据

basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

//三.把上面的装有表单数据的list集合给封装到formentity

UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

//四.把entity给装到请求体中

httpPost.setEntity(formEntity);

 

2.3发送请求,获取响应 -- response

02e34d8d1c2cdd94ec7eee8665446f20ca0.jpg

//2.3发送请求,获得响应

CloseableHttpResponse response = httpClient.execute(httpPost);

 

 

2.4获取响应头  响应状态码  响应体

使用HTTPclient做post请求的爬虫技术_第11张图片

//2.4 把response中数据给解析出来

//2.4.1 获得响应头,并且判断是否成功访问

int statusCode = response.getStatusLine().getStatusCode();

if(statusCode == 200){

Header[] headers = response.getHeaders();

for (Header header : headers) {

System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

}

 

 

        获得响应体

从响应体中能获得 网页,并且把html页面给打印出来; 如果是json格式,需要别的ajax处理

使用HTTPclient做post请求的爬虫技术_第12张图片

//2.4.2 获得响应体

HttpEntity entity = response.getEntity();

//从响应体中获得网页内容并且打印

String html = EntityUtils.toString(entity, "utf-8");

System.out.println(html);

}

 

 

2.5关闭资源

34bbcbe65de230d01660011b105d442c347.jpg

//2.5关闭资源

httpClient.close();

 

打印结果:

使用HTTPclient做post请求的爬虫技术_第13张图片

 

请求参数的样子

使用HTTPclient做post请求的爬虫技术_第14张图片

 

 

 

 

package com.itheima.spider.httpclient;

 

 

import org.apache.http.Header;

import org.apache.http.HttpEntity;

import org.apache.http.client.entity.UrlEncodedFormEntity;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.util.EntityUtils;

 

 

import java.io.IOException;

import java.util.ArrayList;

 

 

public class HttpClientPost {

    public static void main(String[] args) throws IOException {

        //1.确定URL

        String indexUrl = "http://www.itcast.cn";

        //2 发送请求,获得数据

        //2.1 创建httpclient对象

        CloseableHttpClient httpClient = HttpClients.createDefault();

        //2.2创建httppost对象--通过URL得到

        HttpPost httpPost = new HttpPost(indexUrl);

        //2.2.1 设置请求头

        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

        //2.2.2 设置请求参数

        //先获得请求参数的键值对 list集合

        //一,先建立泛型为 键值对的 list集合

        ArrayList basicNameValuePairs = new ArrayList;

        //二,给集合中增加数据

        basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

        basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

        basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

        basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

        basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

        //三.把上面的装有表单数据的list集合给封装到请求体中entity

        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

        //四.把entity给装到请求体中

        httpPost.setEntity(formEntity);

        //2.3发送请求,获得响应

        CloseableHttpResponse response = httpClient.execute(httpPost);

        //2.4 把response中数据给解析出来

        //2.4.1 获得响应头,并且判断是否成功访问

        int statusCode = response.getStatusLine().getStatusCode();

        if(statusCode == 200){

            Header[] headers = response.getHeaders();

            for (Header header : headers) {

                System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

            }

            //2.4.2 获得响应体

            HttpEntity entity = response.getEntity();

            //从响应体中获得网页内容并且打印

            String html = EntityUtils.toString(entity, "utf-8");

            System.out.println(html);

        }

        //2.5关闭资源

        httpClient.close();

 

 

    }

}

 

使用HTTPclient做post请求的爬虫技术_第15张图片

 

1.缺点一个爬取的URL

1148b226e18452649384cb18a311d892550.jpg

//1.确定URL

String indexUrl = "http://www.itcast.cn";

 

2发送请求,获取数据

 

2.1创建httpclient对象

33c4a8e82a4165895cfcd992e849cc124b3.jpg

//2.1 创建httpclient对象

CloseableHttpClient httpClient = HttpClients.createDefault();

 

2.2创建就HTTPpost对象

15e7c8669734c3856b8d719509bda1b9d64.jpg

//2.2创建httpget对象

HttpPost httpPost = new HttpPost(indexUrl);

        

        设置请求头,请求体()

316c2cd98bb143574b911533811ca3a74cd.jpg

//2.2.1 设置请求头

httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400");

 

        设置请求参数:请求体(表单-数据和最下面的)

用于登录的登录名,密码 还有用户信息--键值对

使用HTTPclient做post请求的爬虫技术_第16张图片

 

//先获得请求参数的键值对 list集合

把上面的装有表单数据的list集合给封装到请求体中entity,再到请求体中

 

使用HTTPclient做post请求的爬虫技术_第17张图片

//先获得请求参数的键值对 list集合

//一,先建立泛型为 键值对的 list集合

ArrayList basicNameValuePairs = new ArrayList;

//二,给集合中增加数据--表单数据来源于 网页源码中的form data中的数据

basicNameValuePairs.add(new BasicNameValuePair("txtUser","黑马"));

basicNameValuePairs.add(new BasicNameValuePair("txtPass","123456"));

basicNameValuePairs.add(new BasicNameValuePair("city","北京"));

basicNameValuePairs.add(new BasicNameValuePair("birthday","1980-01-01"));

basicNameValuePairs.add(new BasicNameValuePair("sex","1"));

//三.把上面的装有表单数据的list集合给封装到formentity

UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(basicNameValuePairs);

//四.把entity给装到请求体中

httpPost.setEntity(formEntity);

 

2.3发送请求,获取响应 -- response

ee3334657c693a17bfd03424c6f895be42a.jpg

//2.3发送请求,获得响应

CloseableHttpResponse response = httpClient.execute(httpPost);

 

 

2.4获取响应头  响应状态码  响应体

使用HTTPclient做post请求的爬虫技术_第18张图片

//2.4 把response中数据给解析出来

//2.4.1 获得响应头,并且判断是否成功访问

int statusCode = response.getStatusLine().getStatusCode();

if(statusCode == 200){

Header[] headers = response.getHeaders();

for (Header header : headers) {

System.out.println("响应头:name:"+header.getName()+"value:"+header.getValue());

}

 

 

        获得响应体

从响应体中能获得 网页,并且把html页面给打印出来; 如果是json格式,需要别的ajax处理

使用HTTPclient做post请求的爬虫技术_第19张图片

//2.4.2 获得响应体

HttpEntity entity = response.getEntity();

//从响应体中获得网页内容并且打印

String html = EntityUtils.toString(entity, "utf-8");

System.out.println(html);

}

 

 

2.5关闭资源

d5feaab0c01c2da5226c3ab2ef950982c65.jpg

//2.5关闭资源

httpClient.close();

 

打印结果:

使用HTTPclient做post请求的爬虫技术_第20张图片

 

请求参数的样子

使用HTTPclient做post请求的爬虫技术_第21张图片

 

 

转载于:https://my.oschina.net/u/4140608/blog/3059758

你可能感兴趣的:(使用HTTPclient做post请求的爬虫技术)