用java模拟curl利用cookie登录抓取一个受密码保护的新页面 ( by quqi99 )

              用java模拟curl利用cookie登录抓取一个受密码保护的新页面 ( by quqi99 )



作者:张华 发表于:2011-01-11

版权声明:可以任意转载,转载时请务必以超链接形式标明文章原始出处和作者信息及本版权声明


        本文主要是如何灵活应用HttpURLConnection, 代码内容包括:

        1)以GET或POST方式获取网页正文及HTTP头信息

        2)调用登录接口进行登录,然后获取到登录后的cookie

        3) 用获取到的cookie去访问一个受密码保护的新页面。

        代码如下:



import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import com.TripResearch.util.StringUtils;
import com.TripResearch.util.Utils;

/**
 *
 *
 * @author: huazhang
 * @since: 2011-4-15
 */

public class AutoTop
{

    public final static String CONTENT_TYPE = "Content-Type";

    public static Content curl(String method, String sUrl, Map<String, String> paramMap,
            Map<String, String> requestHeaderMap, boolean isOnlyReturnHeader)
    {
        Content content = null;
        HttpURLConnection httpUrlConnection = null;
        InputStream in = null;
        try
        {
            URL url = new URL(sUrl);
            boolean isPost = "POST".equals(method);
            if (Utils.isEmptyString(method) || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method)))
            {
                method = "POST";
            }
            URL resolvedURL = url;
            if ("GET".equals(method) && !Utils.isEmptySafe(paramMap))
            {
                boolean firstParam = true;
                StringBuffer newUrlBuffer = new StringBuffer(url.toExternalForm());
                if (url.getQuery() == null)
                {
                    newUrlBuffer.append("?");
                }
                else
                {
                    newUrlBuffer.append("&");
                }
                for (Map.Entry<String, String> entry : paramMap.entrySet())
                {
                    String encName = URLEncoder.encode(entry.getKey(), StringUtils.ENC_DESC_UTF8);
                    if (firstParam)
                    {
                        firstParam = false;
                    }
                    else
                    {
                        newUrlBuffer.append("&");
                    }
                    String encValue = URLEncoder.encode(entry.getValue(), StringUtils.ENC_DESC_UTF8);
                    newUrlBuffer.append(encName);
                    newUrlBuffer.append("=");
                    newUrlBuffer.append(encValue);
                }
                resolvedURL = new java.net.URL(newUrlBuffer.toString());
            }

            URLConnection urlConnection = resolvedURL.openConnection();
            httpUrlConnection = (HttpURLConnection) urlConnection;
            httpUrlConnection.setRequestMethod(method);
            // Do not follow redirects, We will handle redirects ourself
            httpUrlConnection.setInstanceFollowRedirects(false);
            urlConnection.setDoOutput(true);
            urlConnection.setDoInput(true);
            urlConnection.setConnectTimeout(5000);
            urlConnection.setReadTimeout(5000);
            urlConnection.setUseCaches(false);
            urlConnection.setDefaultUseCaches(false);
            // set request header
            if (!Utils.isEmptySafe(requestHeaderMap))
            {
                for (Map.Entry<String, String> entry : requestHeaderMap.entrySet())
                {
                    String key = entry.getKey();
                    String val = entry.getValue();
                    if (key != null && val != null)
                    {
                        urlConnection.setRequestProperty(key, val);
                    }
                }
            }
            if (isPost)
            {
                urlConnection.setDoOutput(true);
                ByteArrayOutputStream bufOut = new ByteArrayOutputStream();
                boolean firstParam = true;
                for (Map.Entry<String, String> entry : paramMap.entrySet())
                {
                    String encName = URLEncoder.encode(entry.getKey(), StringUtils.ENC_DESC_UTF8);
                    if (firstParam)
                    {
                        firstParam = false;
                    }
                    else
                    {
                        bufOut.write((byte) '&');
                    }
                    String encValue = URLEncoder.encode(entry.getValue(), StringUtils.ENC_DESC_UTF8);
                    bufOut.write(encName.getBytes(StringUtils.ENC_DESC_UTF8));
                    bufOut.write((byte) '=');
                    bufOut.write(encValue.getBytes(StringUtils.ENC_DESC_UTF8));
                }
                byte[] postContent = bufOut.toByteArray();
                if (urlConnection instanceof HttpURLConnection)
                {
                    ((HttpURLConnection) urlConnection).setFixedLengthStreamingMode(postContent.length);
                }
                OutputStream postOut = urlConnection.getOutputStream();
                postOut.write(postContent);
                postOut.flush();
                postOut.close();
            }
            int responseCode = httpUrlConnection.getResponseCode();
            // We handle redirects ourself
            if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP)
            {
                String location = httpUrlConnection.getHeaderField("Location");
                URL newAction = new URL(url, location);
                // Recurse
                StringBuffer newUrlSb = new StringBuffer(newAction.getProtocol() + "://" + newAction.getHost());
                if (newAction.getPort() != -1)
                {
                    newUrlSb.append(":" + newAction.getPort());
                }
                if (newAction.getPath() != null)
                {
                    newUrlSb.append(newAction.getPath());
                }
                if (newAction.getQuery() != null)
                {
                    newUrlSb.append("?" + newAction.getQuery());
                }
                if (newAction.getRef() != null)
                {
                    newUrlSb.append("#" + newAction.getRef());
                }
                return curl("GET", newUrlSb.toString(), null, requestHeaderMap, isOnlyReturnHeader);
            }
            else if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED)
            {
                byte[] bytes = new byte[0];
                if (!isOnlyReturnHeader)
                {
                    in = httpUrlConnection.getInputStream();
                    ByteArrayOutputStream bout = new ByteArrayOutputStream();
                    byte[] buf = new byte[1024];
                    while (true)
                    {
                        int rc = in.read(buf);
                        if (rc <= 0)
                        {
                            break;
                        }
                        else
                        {
                            bout.write(buf, 0, rc);
                        }
                    }
                    bytes = bout.toByteArray();
                    in.close();
                }
                // only fetch Content-Length and Last-Modified header
                String encoding = null;
                if (Utils.isEmptyString(encoding))
                {
                    encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));
                }
                content = new Content(sUrl, new String(bytes, encoding), httpUrlConnection.getHeaderFields());
            }
        }
        catch (Exception e)
        {
            Utils.ERR(e);
            return null;
        }
        finally
        {
            if (httpUrlConnection != null)
            {
                httpUrlConnection.disconnect();
            }
        }
        return content;
    }

    public static String getEncodingFromContentType(String contentType)
    {
        String encoding = null;
        if (Utils.isEmptyString(contentType))
        {
            return null;
        }
        StringTokenizer tok = new StringTokenizer(contentType, ";");
        if (tok.hasMoreTokens())
        {
            tok.nextToken();
            while (tok.hasMoreTokens())
            {
                String assignment = tok.nextToken().trim();
                int eqIdx = assignment.indexOf('=');
                if (eqIdx != -1)
                {
                    String varName = assignment.substring(0, eqIdx).trim();
                    if ("charset".equalsIgnoreCase(varName))
                    {
                        String varValue = assignment.substring(eqIdx + 1).trim();
                        if (varValue.startsWith("/"") && varValue.endsWith("/""))
                        {
                            // substring works on indices
                            varValue = varValue.substring(1, varValue.length() - 1);
                        }
                        if (Charset.isSupported(varValue))
                        {
                            encoding = varValue;
                        }
                    }
                }
            }
        }
        if (Utils.isEmptyString(encoding))
        {
            return StringUtils.ENC_DESC_UTF8;
        }
        return encoding;
    }

    public static void main(String[] args)
    {
        // login
        String email = "";
        String pass = "";
        String loginUrl = "http://www.quqi.com/Login";
        String rateReviewUrl = "http://www.quqi.com/RateUserReview";
        Map<String, String> paramMap = new HashMap<String, String>();
        paramMap.put("email", email);
        paramMap.put("pass", pass);
        Content content = curl("POST", loginUrl, paramMap, null, false);

        // build request headers & do rate of user review
        List<String> cookieList = content.getHeaders().get("Set-Cookie");
        Map<String, String> requestHeaders = new HashMap<String, String>();  
        if (!Utils.isEmptySafe(cookieList))
        {
            StringBuffer sb = new StringBuffer();
            boolean isLast = false;
            int i = 0;
            for (String val : cookieList)
            {
                i++;
                if(i == cookieList.size())
                {
                    isLast = true;
                }
                int pos = val.indexOf("=");
                if (pos != -1)
                {
                    String cookieName = val.substring(0, pos);
                    String cookieVal = val.substring(pos + 1);
                    cookieVal = cookieVal.split(";")[0];
                    if(isLast)
                    {
                        sb.append(cookieName + "=" + cookieVal);
                    }else
                    {
                        sb.append(cookieName + "=" + cookieVal + ";");
                    }
                }
            }
            requestHeaders.put("Cookie", sb.toString());
        }
        paramMap = new HashMap<String, String>();
        paramMap.put("rateValue", "1");
        content = curl("POST", rateReviewUrl, paramMap, requestHeaders, false);

        System.out.println(content.getBody());
    }

}

class Content
{
    private String                    url;
    private String                    body;
    private Map<String, List<String>> m_mHeaders = new HashMap<String, List<String>>();

    public Content(String url, String body, Map<String, List<String>> headers)
    {
        this.url = url;
        this.body = body;
        this.m_mHeaders = headers;
    }

    public String getUrl()
    {
        return url;
    }

    public String getBody()
    {
        return body;
    }

    public Map<String, List<String>> getHeaders()
    {
        return m_mHeaders;
    }

}

你可能感兴趣的:(java,String,null,url,byte,encoding)