使用httpclient4登录百度

阅读更多

使用httpclient4.0.1登录百度,具体步骤如下:

  1. 访问百度首页获取首页HMTL内容
  2. 使用百度帐号登陆百度
  3. 重复第一步,这时可以比较两次获取的HTML的差别

如果感兴趣想测试的同学,可以去网站上下载需要的Jar包,然后执行一下看看!!

下面附上具体测试的代码:


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.client.params.HttpClientParams;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;

/**
 * 使用httpclient4登陆百度
 * @author gohands
 *
 */
public class BaiduLogin {
   
    private static String URL_CHARACTER = "gb2312"; // 统一字符集
    /**
     * @param args
     */
    public static void main(String[] args) throws Exception {
        //初始化
        DefaultHttpClient httpclient = new DefaultHttpClient();
        // 这一行必须要加,否则服务器无法获取登陆状态
        HttpClientParams.setCookiePolicy(httpclient.getParams(),CookiePolicy.BROWSER_COMPATIBILITY);

        // 第一次访问
        String url = "http://www.baidu.com";
        HttpGet httpget = new HttpGet(url);
        HttpResponse response = httpclient.execute(httpget);
        System.out.println("Length1::" + response.getEntity().getContentLength());
        HttpEntity entity = response.getEntity();
        BaiduLogin.printEntity(entity);

        // 登陆【使用POST方式登录】

        // 如果要直接执行,麻烦去申请个百度的帐号

        // 不好意思,给百度做广告了
        HttpPost httpost = new HttpPost("http://passport.baidu.com/?login");
        List nvps = new ArrayList();
        nvps.add(new BasicNameValuePair("username", "gohands"));
        nvps.add(new BasicNameValuePair("password", "*******"));
        httpost.setEntity(new UrlEncodedFormEntity(nvps, BaiduLogin.URL_CHARACTER));
        response = httpclient.execute(httpost);

        // 第二次访问
        System.out.println("\n----------------------------------------");
        System.out.println(response.getStatusLine());
        List cookies = httpclient.getCookieStore().getCookies();
        entity = response.getEntity();
        BaiduLogin.printEntity(entity);

        System.out.println("\n----------------------------------------");

        cookies = httpclient.getCookieStore().getCookies();

        System.out.println("cookies" + cookies.size());
        httpget = new HttpGet(url);
        // httpget.setr
        // httpget.setHeader(name, value)
        response = httpclient.execute(httpget);
        System.out.println("Length2::"
                + response.getEntity().getContentLength());
        entity = response.getEntity();
        BaiduLogin.printEntity(entity);
    }

    /**
     * 输出entity内容,获取和输出返回的HTML文
     * @param entity
     * @throws IllegalStateException
     * @throws IOException
     */
    private static void printEntity(HttpEntity entity)
            throws IllegalStateException, IOException {

        if (entity == null) {
            return;
        }
        System.out.println("HttpEntity start >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
        System.out.println("Response content length: " + entity.getContentLength());
        InputStream is = entity.getContent();
        BufferedReader in = new BufferedReader(new InputStreamReader(is));
        List li = new ArrayList();
        int i;

        //之所以写的如此复杂是因为了解决中文问题
        while ((i = is.read()) != -1) {
            li.add((byte) i);
        }
        byte a[] = new byte[li.size()];
        for (i = 0; i < a.length; i++) {
            a[i] = (byte) li.get(i);
        }
        System.out.println(new String(a, BaiduLogin.URL_CHARACTER)); // 打印HTML内容
        entity.consumeContent(); // entity销毁
        System.out.println("HttpEntity END >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
    }
}

如果有疑问或好的想法随时可以跟我沟通,谢谢阅读!

依赖的Jar包

apache-mime4j-0.6.jar
commons-codec-1.3.jar
commons-logging-1.1.1.jar
httpclient-4.0.1.jar
httpcore-4.0.1.jar
httpmime-4.0.1.jar

 

-----------------------------------------------------------

2015/02/02:

好几年前写的代码了,

现在前端渲染开发比较多,画面上全是JS封装的,

这段代码已经不能正常使用了。

 

 

你可能感兴趣的:(java,HTMLClient)