此代码只适用于新版正方教务系统
本文主要是代码,具体分析过程有很多爬虫教程都有讲述,主要就是学会使用各种抓包工具,仔细分析提交时的请求头和表单,用各种方法提取或构造出需要提交的参数,具体步骤可以搜索其他爬虫教程,我个人表达能力不是很好,怕讲不明白或者讲错了,所以就不仔细讲述了。
代码是我借鉴了许多版本的java正方爬虫写出的,因为在Android端HttpClient已经被Google官方遗弃,所以代码是基于Jsoup完成的。
Jsoup库用于网络请求和html文本解析
fastjson库用来解析JSON数据
在Android Studio的build.gradle(Module)添加对应依赖:
implementation 'org.jsoup:jsoup:1.11.3'
implementation 'com.alibaba:fastjson:1.1.54.android'
public class Base64 {
public static String b64map="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
private static char b64pad = '=';
private static String hexCode = "0123456789abcdef";
// 获取对应16进制字符
public static char int2char(int a){
return hexCode.charAt(a);
}
// Base64转16进制
public static String b64tohex(String s) {
String ret = "";
int k = 0;
int slop = 0;
for(int i = 0; i < s.length(); ++i) {
if(s.charAt(i) == b64pad) break;
int v = b64map.indexOf(s.charAt(i));
if(v < 0) continue;
if(k == 0) {
ret += int2char(v >> 2);
slop = v & 3;
k = 1;
}
else if(k == 1) {
ret += int2char((slop << 2) | (v >> 4));
slop = v & 0xf;
k = 2;
}
else if(k == 2) {
ret += int2char(slop);
ret += int2char(v >> 2);
slop = v & 3;
k = 3;
}
else {
ret += int2char((slop << 2) | (v >> 4));
ret += int2char(v & 0xf);
k = 0;
}
}
if(k == 1)
ret += int2char(slop << 2);
return ret;
}
// 16进制转Base64
public static String hex2b64(String h) {
int i , c;
StringBuilder ret = new StringBuilder();
for(i = 0; i+3 <= h.length(); i+=3) {
c = parseInt(h.substring(i,i+3),16);
ret.append(b64map.charAt(c >> 6));
ret.append(b64map.charAt(c & 63));
}
if(i+1 == h.length()) {
c = parseInt(h.substring(i,i+1),16);
ret.append(b64map.charAt(c << 2));
}
else if(i+2 == h.length()) {
c = parseInt(h.substring(i,i+2),16);
ret.append(b64map.charAt(c >> 2));
ret.append(b64map.charAt((c & 3) << 4));
}
while((ret.length() & 3) > 0) ret.append(b64pad);
return ret.toString();
}
}
public class RSAEncoder {
private static BigInteger n = null;
private static BigInteger e = null;
public static String RSAEncrypt(String pwd, String nStr, String eStr){
n = new BigInteger(nStr,16);
e = new BigInteger(eStr,16);
BigInteger r = RSADoPublic(pkcs1pad2(pwd,(n.bitLength()+7)>>3));
String sp = r.toString(16);
if((sp.length()&1) != 0 )
sp = "0" + sp;
return sp;
}
private static BigInteger RSADoPublic(BigInteger x){
return x.modPow(e, n);
}
private static BigInteger pkcs1pad2(String s, int n){
if(n < s.length() + 11) { // TODO: fix for utf-8
System.err.println("Message too long for RSAEncoder");
return null;
}
byte[] ba = new byte[n];
int i = s.length()-1;
while(i >= 0 && n > 0) {
int c = s.codePointAt(i--);
if(c < 128) { // encode using utf-8
ba[--n] = new Byte(String.valueOf(c));
}
else if((c > 127) && (c < 2048)) {
ba[--n] = new Byte(String.valueOf((c & 63) | 128));
ba[--n] = new Byte(String.valueOf((c >> 6) | 192));
} else {
ba[--n] = new Byte(String.valueOf((c & 63) | 128));
ba[--n] = new Byte(String.valueOf(((c >> 6) & 63) | 128));
ba[--n] = new Byte(String.valueOf((c >> 12) | 224));
}
}
ba[--n] = new Byte("0");
byte[] temp = new byte[1];
Random rdm = new Random(47L);
while(n > 2) { // random non-zero pad
temp[0] = new Byte("0");
while(temp[0] == 0)
rdm.nextBytes(temp);
ba[--n] = temp[0];
}
ba[--n] = 2;
ba[--n] = 0;
return new BigInteger(ba);
}
}
目前只做了模拟登陆和个人信息查询功能,但是其他功能和查询个人信息的思路是一样的,代码偏长还没有简化,用的时候可以自己改动简化一下
public class JWGLUtils {
/**
* 网站关闭返回相应状态
*/
private static int STATUS_CLOSE = 0;
/**
* 账号密码错误返回状态
*/
private static int STATUS_USERFAULT = -1;
/**
* 账号为空状态
*/
private static int STATUS_USERNULL = -2;
/**
* 密码为空状态
*/
private static int STATUS_PASSNULL = -3;
/**
* 登陆成功状态
*/
private static int STATUS_SUCCEED = 1;
/**
* 查询成功状态
*/
private static int STATUS_INDEX = 2;
/**
* 长时间未响应返回状态
*/
private static int STATUS_RESPOND = 3;
/**
* 状态储存
*/
private static int STATUS_RETURN;
/**
* 对应学校教务处网站
*
* 可通过Baseurl方法修改
*/
private String Baseurl;
/**
* 记录服务器给出cookies
*
* Jsoup已给出拦截方法,使用者无需关注
*/
private Map<String, String> cookies = new HashMap<>();
/**
* 由服务器给出,结合RSA加密工具类得出加密密码,具体方法已封装,使用者无需关注
*/
private String modulus;
private String exponent;
/**
* 需通过解析得到相应参数,使用者无需关注
*/
private String csrftoken;
/**
* Jsoup对应接口
*/
private Connection connection;
private Connection.Response response;
private Document document;
/**
* 用户登陆账号
*
* 结合相应方法传入参数
*/
private String stuNum;
/**
* 用户登陆密码
*
* 结合相应方法传入参数
*/
private String password;
/**
* 登陆实时时间(毫秒)
*
* 用户无需关注
*/
private String mNowTime;
/**
* 设置基础url
*
* @param Baseurl 学校教务处基础网址
* @return
*/
public JWGLUtils Baseurl(String Baseurl) {
this.Baseurl = Baseurl;
return this;
}
/**
* 设置用户登陆账号
*
* @param stuNum 教务处账号
* @return
*/
public JWGLUtils Username(String stuNum) {
this.stuNum = stuNum;
return this;
}
/**
* 设置用户登陆密码
*
* @param password 教务处密码
* @return
*/
public JWGLUtils Password(String password) {
this.password = password;
return this;
}
/**
* @return ResponseBody
*/
public Document GetString() {
return this.document;
}
/**
* 解析csrftokoen参数值
* 同时得到相应Cookies
*
* @return
*/
private boolean getCsrftoken() {
try {
mNowTime = String.valueOf(new Date().getTime());
connection = Jsoup.connect(Baseurl + "/jwglxt/xtgl/login_slogin.html?language=zh_CN&_t=" + mNowTime);
connection.header("User-Agent", "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10");
connection.header("Cache-Control", "no-cache");
connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.header("Accept-Language", "zh-CN,zh;q=0.9");
connection.header("Upgrade-Insecure-Requests", "1");
connection.header("Connection", "keep-alive");
// connection.header("Host", "jwzx.zjxu.edu.cn");
response = connection.timeout(5000).execute();
cookies = response.cookies();
document = Jsoup.parse(response.body());
csrftoken = document.getElementById("csrftoken").val();
} catch (Exception ex) {
ex.printStackTrace();
return false;
}
return true;
}
/**
* 获取公钥并加密密码
*
* @return
*/
private boolean getRSApublickey() { // throws Exception
try {
connection = Jsoup.connect(Baseurl + "/jwglxt/xtgl/login_getPublicKey.html?" +
"time=" + mNowTime);
connection.header("User-Agent", "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10");
connection.header("Cache-Control", "no-cache");
connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.header("Accept-Language", "zh-CN,zh;q=0.9");
connection.header("Upgrade-Insecure-Requests", "1");
connection.header("Connection", "keep-alive");
// connection.header("Host", "jwzx.zjxu.edu.cn");
response = connection.cookies(cookies).ignoreContentType(true).timeout(5000).execute();
JSONObject jsonObject = JSON.parseObject(response.body());
modulus = jsonObject.getString("modulus");
exponent = jsonObject.getString("exponent");
password = RSAEncoder.RSAEncrypt(password, Base64.b64tohex(modulus), Base64.b64tohex(exponent));
password = Base64.hex2b64(password);
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
/**
* 模拟登陆
* 得到重新分配的Cookies
*
* @return
*/
private int beginLogin() { // throws Exception
try {
connection = Jsoup.connect(Baseurl + "/jwglxt/xtgl/login_slogin.html?language=zh_CN&_t=" + mNowTime);
connection.header("Content-Type", "application/x-www-form-urlencoded;charset=utf-8");
connection.header("User-Agent", "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10");
connection.header("Cache-Control", "no-cache");
connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.header("Accept-Language", "zh-CN,zh;q=0.9");
connection.header("Upgrade-Insecure-Requests", "1");
connection.header("Connection", "keep-alive");
// connection.header("Host", "jwzx.zjxu.edu.cn");
connection.header("Referer", Baseurl + "/jwglxt/xtgl/login_slogin.html?language=zh_CN&_t=" + mNowTime);
connection.header("Proxy-Connection", "keep-alive");
connection.header("Pragma", "no-cache");
connection.data("csrftoken", csrftoken);
connection.data("yhm", stuNum);
connection.data("mm", password);
connection.data("mm", password);
response = connection.cookies(cookies).ignoreContentType(true)
.timeout(5000)
.method(Connection.Method.POST).execute();
cookies = response.cookies();
document = Jsoup.parse(response.body());
} catch (IOException e) {
e.printStackTrace();
return STATUS_CLOSE;
}
if (document.getElementById("tips") == null) {
return STATUS_SUCCEED;
} else {
return STATUS_USERFAULT;
}
}
/**
* 查询个人信息
*
* @return
*/
public int getStudentInformaction() { // throws Exception
try {
if (stuNum == null)
return STATUS_USERNULL;
if (password == null)
return STATUS_PASSNULL;
if (getCsrftoken() == false)
return STATUS_CLOSE;
if (getRSApublickey() == false)
return STATUS_CLOSE;
STATUS_RETURN = beginLogin();
if (STATUS_RETURN == STATUS_CLOSE) {
return STATUS_CLOSE;
} else if (STATUS_RETURN == STATUS_USERFAULT) {
return STATUS_USERFAULT;
} else {
System.out.println("登陆成功");
}
connection = Jsoup.connect(Baseurl + "/jwglxt/xsxxxggl/xsxxwh_cxCkDgxsxx.html?gnmkdm=N100801&su=" + stuNum);
connection.cookies(cookies);
connection.header("User-Agent", "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10");
connection.header("Cache-Control", "no-cache");
connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.header("Accept-Language", "zh-CN,zh;q=0.9");
connection.header("Upgrade-Insecure-Requests", "1");
connection.header("Connection", "keep-alive");
// connection.header("Host", "jwzx.zjxu.edu.cn");
connection.header("Referer", Baseurl + "/jwglxt/xtgl/login_slogin.html?language=zh_CN&_t=" + mNowTime);
connection.header("Proxy-Connection", "keep-alive");
connection.header("Pragma", "no-cache");
response = connection.ignoreContentType(true)
.timeout(5000)
.method(Connection.Method.GET).execute();
} catch (IOException e) {
e.printStackTrace();
return STATUS_CLOSE;
}
document = Jsoup.parse(response.body());
return STATUS_INDEX;
}
}
public class Test {
public void Hello() {
new Thread(new Runnable() { // Android的网络请求必须在子线程
@Override
public void run() {
JWGLUtils jwglUtils = new JWGLUtils()
.Baseurl("教务处网站")
.Username("登陆账号")
.Password("密码");
switch (jwglUtils.getStudentInformaction()){
case 0:
System.out.println("教务处网站关闭");
break;
case 2:
System.out.println(jwglUtils.GetString());
break;
case -1:
System.out.println("账号密码错误请重新输入");
break;
case -2:
System.out.println("账号不得为空");
break;
case -3:
System.out.println("密码不得为空");
break;
}
}
}).start();
}
}
本人代码水平有限,大部分都是借鉴大神们的代码,为了方便像我一样写Android爬虫的初学者,所以分享自己的一点经验。有疏忽的地方希望各位多提意见。