【Xmind】Java数据抓取

https://blog.csdn.net/sam372648886gz/article/details/79374094

1.抓取登录页面后的页面

https://www.cnblogs.com/huihui123/p/7770533.html
思路:1,登录。2,登录成功后获得cookie。3,将cookie放到请求头中,向登录页发送请求。

1.获得URL对象

 URL url new URL(urlString); 

2.打开连接

HttpURLConnection urlConn = (HttpURLConnection) url.openConnection(); 

3.向request中注入cookie

 urlConn.setRequestProperty("Cookie", cookie); 

4.创建字符缓冲输入流(BufferedReader)

BufferedReader br = new BufferedReader(new InputStreamReader(urlConn.getInputStream(),charset)); 

 

public class CrawTest { 
    //获得网页源代码 
    private static String getHtml(String urlString,String charset,String cookie){ 
        StringBuffer html = new StringBuffer(); 
        try { 
            URL url = new URL(urlString); 
            HttpURLConnection urlConn = (HttpURLConnection) url.openConnection(); 
            urlConn.setRequestProperty("Cookie", cookie); 
            BufferedReader br = new BufferedReader(new InputStreamReader(urlConn.getInputStream(),charset)); 
                
            String str; 
            while((str=br.readLine())!=null){ 
                html.append(str); 
            } 
        } catch (MalformedURLException e) { 
            // TODO Auto-generated catch block 
            e.printStackTrace(); 
        } catch (IOException e) { 
            // TODO Auto-generated catch block 
            e.printStackTrace(); 
        } 
            
        return html.toString(); 
    } 
        
        
    //发送post请求,并返回请求后的cookie 
    private static String postGetCookie(String urlString,String params,String charset){ 
        String cookies=null; 
        try { 
            URL url = new URL(urlString); 
            URLConnection urlConn = url.openConnection(); 
            urlConn.setDoInput(true); 
            urlConn.setDoOutput(true); 
            PrintWriter out = new PrintWriter(urlConn.getOutputStream()); 
            out.print(params); 
            out.flush(); 
            cookies = urlConn.getHeaderFields().get("Set-Cookie").get(0); 
        } catch (MalformedURLException e) { 
            // TODO Auto-generated catch block 
            e.printStackTrace(); 
        } catch (IOException e) { 
            // TODO Auto-generated catch block 
            e.printStackTrace(); 
        } 
        return cookies; 
    } 
        
    public static void main(String[] args) { 
        String cookie = postGetCookie("http://localhost:8080/loginDemo/login", 
                "username=admin&password=123456","utf-8"); 
        String html = getHtml("http://localhost:8080/loginDemo/index.jsp", "utf-8", cookie); 
        System.out.println(html);//这里我们就可能输出登录后的网页源代码了 
    } 
} 

 

你可能感兴趣的:(Xmind)