JAVA爬取学校教务处课表

小白JAVA爬取学校教务处课表

闲来无事- -就爬取了学校的教务处系统的课表试试水
如果有不懂的可以先了解post,get请求

  • 利用java自定义发送post请求模拟登陆

  • 利用java自定义发送get请求获取学校课表并保存


目录

[TOC]来生成目录:

  • 小白JAVA爬取学校教务处课表
      • 目录
      • post
      • cookie
      • 对账号处理
      • get
      • 保存为HTML

post

利用java模拟post登陆:


package com.ccu.post;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;

import com.ccu.bao.write;



/**
 * @author Post Method
 */
public class post {
    static String ya;
    static String cookie;

    public static void sendPost1() {

        //paramMap  post请求参数
        Map paramMap = new HashMap();
        paramMap.put("encoded", ya);

        //url  post请求网址
        String url = "http://cdjwc.ccu.edu.cn/jsxsd/xk/LoginToXk";
        //对网址空格的处理  
        url= url.replaceAll(" ", "%20");
        PrintWriter out = null;
        BufferedReader in = null;
        String result = "";
        StringBuffer sb = new StringBuffer(); 
        //获取教务处的cookie进行对账号绑定
        ccc cc = new ccc();
        cookie = cc.getcookie();
        try {
            URL realUrl = new URL(url);
            // 打开和URL之间的连接
            URLConnection conn = realUrl.openConnection();
            // 设置请求属性
            conn.setRequestProperty("Accept", "*/*");
            conn.setRequestProperty("Accept-Encoding", "gzip, deflate");
            conn.setRequestProperty("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
            conn.setRequestProperty("Connection","keep-alive");
            conn.setRequestProperty("Content-Length","41");
            conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
            conn.setRequestProperty("Cookie",cookie);
            conn.setRequestProperty("Host","cdjwc.ccu.edu.cn");
            conn.setRequestProperty("Referer","http://cdjwc.ccu.edu.cn/jsxsd/");
            conn.setRequestProperty("Upgrade-Insecure-Requests","1");
            conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0");
            // conn.setRequestProperty("Charset", "UTF-8");
            // 发送POST请求必须设置如下两行
            conn.setDoOutput(true);
            conn.setDoInput(true);
            // 获取URLConnection对象对应的输出流
            out = new PrintWriter(conn.getOutputStream());

            // 设置请求属性
            String param = "";
            if (paramMap != null && paramMap.size() > 0) {
                Iterator ite = paramMap.keySet().iterator();
                while (ite.hasNext()) {
                    String key = ite.next();// key
                    String value = paramMap.get(key);
                    param += key + "=" + value + "&";
                }
                param = param.substring(0, param.length() - 1);
            }

            // 发送请求参数
            out.print(param);
            // flush输出流的缓冲
            out.flush();
            // 定义BufferedReader输入流来读取URL的响应
             in = new BufferedReader(new InputStreamReader(
                        conn.getInputStream()));
            String line;
            while ((line = in.readLine()) != null) {
                sb.append(line+"\n");
            }
        } catch (Exception e) {
            System.err.println("发送 POST 请求出现异常!" + e);
            e.printStackTrace();
        }
        // 使用finally块来关闭输出流、输入流
        finally {
            try {
                if (out != null) {
                    out.close();
                }
                if (in != null) {
                    in.close();
                }
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
    }
    public static void main(String[] args) {
        System.out.println("请输入账号");
        Scanner scanner = new Scanner(System.in);
        String a = scanner.nextLine();
        System.out.println("请输入密码");
        String b = scanner.nextLine();
        /**
         * 对登陆的账号和密码进行加密处理
         */
        BASE64 base64 = new BASE64();
        String JIAMI1 = base64.getBase64(a);
        String JIAMI2 = base64.getBase64(b);
        String acc,acc1;
        acc = JIAMI1.concat("%25%25%25");
        acc1 = acc.concat(JIAMI2);
        if(acc1.endsWith("==")){
             ya = acc1.replace("==","%3D%3D");
        }
        else
            ya = acc1;
        post p = new post();
        //发送post请求
        p.sendPost1();
        //发送get请求
        get g = new get();
        String cccc = g.sendGet();
        //得到课表保存为HTML网页
        write w = new write();
        w.writee(cccc, a);
    }


}

获取教务处的cookie:

package com.ccu.post;

import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.CookieStore;
import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.util.List;
public class ccc{

public String getcookie() {
    try{
        CookieManager manager=new CookieManager();
        manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
        CookieHandler.setDefault(manager);
        URL url=new URL("http://cdjwc.ccu.edu.cn/jsxsd/");
        HttpURLConnection conn= (HttpURLConnection) url.openConnection();
        conn.getHeaderFields();
        CookieStore store = manager.getCookieStore();

        List lCookies=store.getCookies();
        for (HttpCookie cookie: lCookies) {
            if(cookie.getName().equals("JSESSIONID")){
                System.out.println(cookie.toString());
                return cookie.toString();
            }
        }
    }catch (Exception e){

        e.printStackTrace();
    }
    return null;

}
public static void main(String[] args) {
    ccc c = new ccc();
    c.getcookie();
}



}

对账号处理

根据自己学校的加密方式写:

package com.ccu.post;

import java.io.UnsupportedEncodingException;

import sun.misc.*;

public class BASE64 {
    // 加密
    public static String getBase64(String str) {
        byte[] b = null;
        String s = null;
        try {
            b = str.getBytes("utf-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        if (b != null) {
            s = new BASE64Encoder().encode(b);
        }
        return s;
    }

}

JAVA爬取学校教务处课表_第1张图片

JAVA爬取学校教务处课表_第2张图片

JAVA爬取学校教务处课表_第3张图片

get

get请求获取网页:

package com.ccu.post;

import java.io.BufferedReader;

public class get {
    public static String sendGet() {
        //跳转到课程表的URL
        String url = "http://cdjwc.ccu.edu.cn/jsxsd/xskb/xskb_list.do?Ves632DSdyV=NEW_XSD_PYGL";
        //对网址处理
        url= url.replaceAll(" ", "%20");
        //get请求数据
        String param = "Ves632DSdyV=NEW_XSD_PYGL";
        StringBuffer sb = new StringBuffer(); 
        BufferedReader in = null;
        try {
            String urlNameString = url + "?" + param;
            URL realUrl = new URL(urlNameString);
            // 打开和URL之间的连接
            URLConnection connection = realUrl.openConnection();
            // 设置通用的请求属性
            connection.setRequestProperty("Accept", "text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8");
            connection.setRequestProperty("Accept-Encoding", "gzip, deflate");
            connection.setRequestProperty("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
            connection.setRequestProperty("Connection","keep-alive");
            connection.setRequestProperty("Cookie",post.cookie);
            connection.setRequestProperty("Host","cdjwc.ccu.edu.cn");
            connection.setRequestProperty("Referer","http://cdjwc.ccu.edu.cn/jsxsd/framework/main.jsp");
            connection.setRequestProperty("Upgrade-Insecure-Requests","1");
            connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0");
            // 建立实际的连接
            connection.connect();
            // 获取所有响应头字段
            Map> map = connection.getHeaderFields();
            // 遍历所有的响应头字段
            for (String key : map.keySet()) {
                System.out.println(key + "--->" + map.get(key));
            }
            // 定义 BufferedReader输入流来读取URL的响应
            InputStream stream = new GZIPInputStream(connection.getInputStream());
            in = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
            String line;
            int k = 0,a = 0;
            /*
             * 获得自己想获取的源代码段
             */
            while ((line = in.readLine()) != null) {
                if(line.equals("  
Copyright (C) 湖南强智科技发展有限公司 2003-2013 All Rights Reserved 湘ICP 备12010071号
"
)){ k=1; } if(line.equals("")&&a==1){ k=0; } if(k==0) sb.append(line+"\n"); if(line.equals("")&&a==0){ a++; k=1; } if(line.equals(" 放大")){ k=0; } } } catch (Exception e) { System.out.println("发送GET请求出现异常!" + e); e.printStackTrace(); } // 使用finally块来关闭输入流 finally { try { if (in != null) { in.close(); } } catch (Exception e2) { e2.printStackTrace(); } } return sb.toString(); } }

JAVA爬取学校教务处课表_第4张图片

JAVA爬取学校教务处课表_第5张图片

JAVA爬取学校教务处课表_第6张图片

保存为HTML

~~~~:

package com.ccu.bao;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

public class write {

    public void writee(String aaa , String bbb) {
        String a = "C:/Users/Songzr/Desktop/";
        String b = ".html";
        a = a.concat(bbb);
        a = a.concat(b);
        try {
            FileWriter writer = new FileWriter(a,true);       
               writer.write(aaa);
            writer.flush();
            writer.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

结果图
JAVA爬取学校教务处课表_第7张图片

JAVA爬取学校教务处课表_第8张图片

你可能感兴趣的:(Java)