闲来无事- -就爬取了学校的教务处系统的课表试试水
如果有不懂的可以先了解post,get请求
利用java自定义发送post请求模拟登陆
利用java自定义发送get请求获取学校课表并保存
用 [TOC]
来生成目录:
利用java模拟post登陆:
package com.ccu.post;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;
import com.ccu.bao.write;
/**
* @author Post Method
*/
public class post {
static String ya;
static String cookie;
public static void sendPost1() {
//paramMap post请求参数
Map paramMap = new HashMap();
paramMap.put("encoded", ya);
//url post请求网址
String url = "http://cdjwc.ccu.edu.cn/jsxsd/xk/LoginToXk";
//对网址空格的处理
url= url.replaceAll(" ", "%20");
PrintWriter out = null;
BufferedReader in = null;
String result = "";
StringBuffer sb = new StringBuffer();
//获取教务处的cookie进行对账号绑定
ccc cc = new ccc();
cookie = cc.getcookie();
try {
URL realUrl = new URL(url);
// 打开和URL之间的连接
URLConnection conn = realUrl.openConnection();
// 设置请求属性
conn.setRequestProperty("Accept", "*/*");
conn.setRequestProperty("Accept-Encoding", "gzip, deflate");
conn.setRequestProperty("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
conn.setRequestProperty("Connection","keep-alive");
conn.setRequestProperty("Content-Length","41");
conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
conn.setRequestProperty("Cookie",cookie);
conn.setRequestProperty("Host","cdjwc.ccu.edu.cn");
conn.setRequestProperty("Referer","http://cdjwc.ccu.edu.cn/jsxsd/");
conn.setRequestProperty("Upgrade-Insecure-Requests","1");
conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0");
// conn.setRequestProperty("Charset", "UTF-8");
// 发送POST请求必须设置如下两行
conn.setDoOutput(true);
conn.setDoInput(true);
// 获取URLConnection对象对应的输出流
out = new PrintWriter(conn.getOutputStream());
// 设置请求属性
String param = "";
if (paramMap != null && paramMap.size() > 0) {
Iterator ite = paramMap.keySet().iterator();
while (ite.hasNext()) {
String key = ite.next();// key
String value = paramMap.get(key);
param += key + "=" + value + "&";
}
param = param.substring(0, param.length() - 1);
}
// 发送请求参数
out.print(param);
// flush输出流的缓冲
out.flush();
// 定义BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
sb.append(line+"\n");
}
} catch (Exception e) {
System.err.println("发送 POST 请求出现异常!" + e);
e.printStackTrace();
}
// 使用finally块来关闭输出流、输入流
finally {
try {
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
public static void main(String[] args) {
System.out.println("请输入账号");
Scanner scanner = new Scanner(System.in);
String a = scanner.nextLine();
System.out.println("请输入密码");
String b = scanner.nextLine();
/**
* 对登陆的账号和密码进行加密处理
*/
BASE64 base64 = new BASE64();
String JIAMI1 = base64.getBase64(a);
String JIAMI2 = base64.getBase64(b);
String acc,acc1;
acc = JIAMI1.concat("%25%25%25");
acc1 = acc.concat(JIAMI2);
if(acc1.endsWith("==")){
ya = acc1.replace("==","%3D%3D");
}
else
ya = acc1;
post p = new post();
//发送post请求
p.sendPost1();
//发送get请求
get g = new get();
String cccc = g.sendGet();
//得到课表保存为HTML网页
write w = new write();
w.writee(cccc, a);
}
}
获取教务处的cookie:
package com.ccu.post;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.CookieStore;
import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.util.List;
public class ccc{
public String getcookie() {
try{
CookieManager manager=new CookieManager();
manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
CookieHandler.setDefault(manager);
URL url=new URL("http://cdjwc.ccu.edu.cn/jsxsd/");
HttpURLConnection conn= (HttpURLConnection) url.openConnection();
conn.getHeaderFields();
CookieStore store = manager.getCookieStore();
List lCookies=store.getCookies();
for (HttpCookie cookie: lCookies) {
if(cookie.getName().equals("JSESSIONID")){
System.out.println(cookie.toString());
return cookie.toString();
}
}
}catch (Exception e){
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
ccc c = new ccc();
c.getcookie();
}
}
根据自己学校的加密方式写:
package com.ccu.post;
import java.io.UnsupportedEncodingException;
import sun.misc.*;
public class BASE64 {
// 加密
public static String getBase64(String str) {
byte[] b = null;
String s = null;
try {
b = str.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
if (b != null) {
s = new BASE64Encoder().encode(b);
}
return s;
}
}
get请求获取网页:
package com.ccu.post;
import java.io.BufferedReader;
public class get {
public static String sendGet() {
//跳转到课程表的URL
String url = "http://cdjwc.ccu.edu.cn/jsxsd/xskb/xskb_list.do?Ves632DSdyV=NEW_XSD_PYGL";
//对网址处理
url= url.replaceAll(" ", "%20");
//get请求数据
String param = "Ves632DSdyV=NEW_XSD_PYGL";
StringBuffer sb = new StringBuffer();
BufferedReader in = null;
try {
String urlNameString = url + "?" + param;
URL realUrl = new URL(urlNameString);
// 打开和URL之间的连接
URLConnection connection = realUrl.openConnection();
// 设置通用的请求属性
connection.setRequestProperty("Accept", "text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8");
connection.setRequestProperty("Accept-Encoding", "gzip, deflate");
connection.setRequestProperty("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
connection.setRequestProperty("Connection","keep-alive");
connection.setRequestProperty("Cookie",post.cookie);
connection.setRequestProperty("Host","cdjwc.ccu.edu.cn");
connection.setRequestProperty("Referer","http://cdjwc.ccu.edu.cn/jsxsd/framework/main.jsp");
connection.setRequestProperty("Upgrade-Insecure-Requests","1");
connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0");
// 建立实际的连接
connection.connect();
// 获取所有响应头字段
Map> map = connection.getHeaderFields();
// 遍历所有的响应头字段
for (String key : map.keySet()) {
System.out.println(key + "--->" + map.get(key));
}
// 定义 BufferedReader输入流来读取URL的响应
InputStream stream = new GZIPInputStream(connection.getInputStream());
in = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
String line;
int k = 0,a = 0;
/*
* 获得自己想获取的源代码段
*/
while ((line = in.readLine()) != null) {
if(line.equals(" Copyright (C) 湖南强智科技发展有限公司 2003-2013 All Rights Reserved 湘ICP 备12010071号")){
k=1;
}
if(line.equals("")&&a==1){
k=0;
}
if(k==0)
sb.append(line+"\n");
if(line.equals("")&&a==0){
a++;
k=1;
}
if(line.equals(" 放大")){
k=0;
}
}
} catch (Exception e) {
System.out.println("发送GET请求出现异常!" + e);
e.printStackTrace();
}
// 使用finally块来关闭输入流
finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
return sb.toString();
}
}
~~~~:
package com.ccu.bao;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class write {
public void writee(String aaa , String bbb) {
String a = "C:/Users/Songzr/Desktop/";
String b = ".html";
a = a.concat(bbb);
a = a.concat(b);
try {
FileWriter writer = new FileWriter(a,true);
writer.write(aaa);
writer.flush();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}