网络爬虫入门(三)-Cookie登录

一.Cookie的概念

学过javaweb应该对cookie不陌生

Cookie是浏览器存储存储用户信息的一小段文本,它保存了用户的ID等信息,这些信息可以被服务器端识别,并作为标识用户的手段,以此来判定用户是不是第一次访问
Cookie是当你浏览某网站时,网站存储在你机器上的一个小文本文件,它记录了你的用户ID,密码、浏览过的网页、停留的时间等信息,当你再次来到该网站时,网站通过读取Cookie,得知你的相关信息,就可以做出相应的动作,如在页面显示欢迎你的标语,或者让你不用输入ID、密码就直接登录等等

二.保留Cookie继续模拟登录

这里我们还是以登录csdn为例子

  • 我们先用commons包下的HttpClient的尝试下
 private String Url = "https://passport.csdn.net/account/verify";
    private String lt,execution;
    private HttpClient httpClient;
    
    @Test
    public void getCookieLogin() throws  Exception{
        httpClient = new HttpClient();
        PostMethod postMethod = new PostMethod(Url);
        
        GetUrl();

        postMethod.addParameter("gps","");
        postMethod.addParameter("username",你的用户名);
        postMethod.addParameter("password",你的密码);
        postMethod.addParameter("rememberMe","true");
        postMethod.addParameter("lt",lt);
        postMethod.addParameter("execution",execution);
        postMethod.addParameter("fkid",自己抓包获取吧);
        postMethod.addParameter("_eventId","submit");
        postMethod.addParameter("iframe","false");

        Thread.sleep(5000);

        int statsCode = httpClient.executeMethod(postMethod);
        if(statsCode == 200){
            Cookie [] cookies = httpClient.getState().getCookies();
            StringBuffer stringBuffer = new StringBuffer();
            for(Cookie cookie:cookies){
                stringBuffer.append(cookie.toString()+";");
            }


            HttpClient httpClient1 = new HttpClient();
            GetMethod getMethod = new GetMethod(Url);
            getMethod.setRequestHeader("cookie",stringBuffer.toString());//设置cookie
            httpClient1.executeMethod(getMethod);
            System.out.println(getMethod.getResponseBodyAsString());
        }

    }


    private void GetUrl() throws  Exception{
        GetMethod getMethod = new GetMethod(Url);
        httpClient.executeMethod(getMethod);
        String html = getMethod.getResponseBodyAsString();
        lt = GetToken("lt",html);
        execution = GetToken("execution",html);

    }

    private String GetToken(String reg,String html) {
        String result = "";
        Document document = Jsoup.parse(html);
        String rex = "input[name="+reg+"]";
        Elements links = document.select(rex);
        for(Element link:links){
            if(link!=null){
                result=link.attr("value");
                break;

            }
            }
            return  result;
    }
  • 打印出的响应体是如下就是登录成功了

网络爬虫入门(三)-Cookie登录_第1张图片

  • 我们再尝试下apache包下的HttpClient
 private String Url = "https://passport.csdn.net/account/verify";
    private String lt, execution;
    private CloseableHttpClient httpClient;

    @Test
    public void getCookieLogin() throws Exception {
        //创建cookieStore实例
        CookieStore cookieStore = new BasicCookieStore();
        //创建HttpClient上下文
        HttpClientContext context = HttpClientContext.create();
        //把cookie保存HttpClientContext中
        context.setCookieStore(cookieStore);

        httpClient = HttpClients.custom().setDefaultCookieStore(cookieStore).build();

        HttpPost httpPost = new HttpPost(Url);

        GetUrl();

        List params = new ArrayList();
        params.add(new BasicNameValuePair("gps",""));
        params.add(new BasicNameValuePair("username",你的用户名));
        params.add(new BasicNameValuePair("password",你的密码));
        params.add(new BasicNameValuePair("rememberMe","true"));
        params.add(new BasicNameValuePair("lt",lt));
        params.add(new BasicNameValuePair("execution",execution));
        params.add(new BasicNameValuePair("fkid",自己抓包获取));
        params.add(new BasicNameValuePair("_eventId","submit"));
        params.add(new BasicNameValuePair("iframe","false"));


        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params,"UTF-8");

        httpPost.setEntity(formEntity);

        Thread.sleep(3000);

        CloseableHttpResponse response = httpClient.execute(httpPost);

        System.out.println(EntityUtils.toString(response.getEntity()));
         for(Cookie  cookie:cookieStore.getCookies()){
             System.out.println(cookie.getName()+":"+cookie.getValue());
         }
         response.close();

         httpClient.close();//因为登录cookie已经获取了,所以关了重新开一个httpclient来实验

         HttpGet httpGet = new HttpGet(Url);
         CloseableHttpClient httpClient1 = HttpClients.createDefault();
         CloseableHttpResponse response1 = httpClient1.execute(httpGet,context);//这里不设置context就没有设置cookie就不会登录成功
        System.out.println(EntityUtils.toString(response1.getEntity()));
    }

    private void GetUrl() throws Exception {

        HttpGet httpGet = new HttpGet(Url);
        CloseableHttpResponse response = httpClient.execute(httpGet);
        String html = EntityUtils.toString(response.getEntity());
        lt = GetToken("lt", html);
        execution = GetToken("execution", html);

    }

    private String GetToken(String reg, String html) {
        String result = "";
        Document document = Jsoup.parse(html);
        String rex = "input[name=" + reg + "]";
        Elements links = document.select(rex);
        for (Element link : links) {
            if (link != null) {
                result = link.attr("value");
                break;

            }
        }
        return result;
    }

你可能感兴趣的:(网络爬虫)