英语四六级成绩查询地址,也是本次咱们要爬取的地址
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.68</version>
</dependency>
1.第一个请求,获取验证码
①分析请求传参
http://cache.neea.edu.cn/Imgs.do
这个请求一共带了三个参数
c、ik、t
后面通过分析发现参数c是写死了的,值就为CET;ik就是准考号;t是一个小于1的一个随机数,非必带的一个参数
②分析返回值
请求成功后返回的是如下这样的一个字符串,而不是直接返回的验证码图片
result.imgs(“a37b67cc9ebb4405916cb9490ddba623”);
查看上图的请求发现,他是把上个请求返回的字符通过截取拼接成一个新请求,从而获取到验证码图片并显示在页面的
分析完成后先用Postman接口测试下
测试的时候发现参数都带了,但是还是报403,经过一番排查后发现,请求头中要带Referer=http://cet.neea.edu.cn/cet才行
③.代码实现
public static void main(String[] args) throws Exception {
String c = "CET";
String ik = "420090201100101";
// 获取验证码地址的请求,后面写成动态的
String url = "http://cache.neea.edu.cn/Imgs.do?c=" + c + "&ik=" + ik;
// 获得一个httpclient对象
HttpGet httpget = new HttpGet(url);
CloseableHttpClient httpclient = HttpClientBuilder.create().build();
// 设置请求头
httpget.addHeader("Referer", "http://cet.neea.edu.cn/cet");
CloseableHttpResponse response = null;
String resultString = "";
String img_url = "http://cet.neea.edu.cn/imgs/";
String cookie = "";
try {
response = httpclient.execute(httpget);
if (response.getStatusLine().getStatusCode() == 200) {
resultString = EntityUtils.toString(response.getEntity(), "UTF-8");
// 取到括号里面的图片名称
String substr = resultString.substring(13, resultString.length() - 3);
System.out.println(substr);
img_url += substr + ".png";
// 获取图片的Base64码
String imageToBase64 = NetImageToBase64(img_url);
System.out.println(imageToBase64);
} catch (Exception e) {
e.printStackTrace();
throw new InternalError(e.getMessage());
}
}
/**
* 网络图片转换Base64的方法
* @param netImagePath
*/
private static String NetImageToBase64(String netImagePath) {
ByteArrayOutputStream data = new ByteArrayOutputStream();
try {
// 创建URL
URL url = new URL(netImagePath);
byte[] by = new byte[1024];
// 创建链接
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5000);
InputStream is = conn.getInputStream();
// 将内容读取内存中
int len = -1;
while ((len = is.read(by)) != -1) {
data.write(by, 0, len);
}
// 关闭流
is.close();
} catch (IOException e) {
e.printStackTrace();
}
// 对字节数组Base64编码
BASE64Encoder encoder = new BASE64Encoder();
return encoder.encode(data.toByteArray());
}
就这样我们得到验证码的Base64编码,把控制台打印出来的编码前面加上(data:image/png;base64,)复制到浏览器打开,这样就获得验证码信息了
....
//在上面代码System.out.println(imageToBase64);的后面加上以下代码
// 获取cookies信息
String cookies = getCookie(response.getAllHeaders());
String[] split = cookies.split(";");
cookie = split[0];
System.out.println(cookie);
/**
* 获取Cookie
*/
public static String getCookie(Header[] headers) {
for (Header header : headers) {
if ("Set-Cookie".equals(header.getName())) {
return header.getValue();
}
}
return null;
}
下面的代码是测试获取考试成绩的
public static void main(String[] args) throws Exception {
String ik = "420090201100101";
String a = "verify=enc|f4c4fe9e2580fe1871ed5882a9aef232694e334c676d08a5a6e55fd0fe24015665759ce14873dbc35ae269589bb742bb";
try {
CloseableHttpResponse httpResponse = resultInquiry(ik, "周西", "nn5a",a);
if (httpResponse.getStatusLine().getStatusCode() == 200) {
resultString = EntityUtils.toString(httpResponse.getEntity(), "UTF-8");
String skh ="(?<=\\()[^\\)]+";//用于匹配()里面的文字
Pattern pattern = Pattern.compile(skh);
Matcher matcher = pattern.matcher(resultString);
boolean is = matcher.find();
if(is){
System.out.print(matcher.group());
Map<String,Object> jsonToMap = JSONObject.parseObject(matcher.group());
System.out.println(jsonToMap);
}
} catch (Exception e) {
e.printStackTrace();
throw new InternalError(e.getMessage());
}
}
/**
* 发送第二次请求,获取考试成绩
*
* @param ticketNumber 准考证号
* @param name 姓名
* @param verificationCode 验证码
* @param cookie
* @return
* @throws UnsupportedEncodingException
*/
public static CloseableHttpResponse resultInquiry(String ticketNumber, String name, String verificationCode, String cookie) throws UnsupportedEncodingException {
String url = "http://cache.neea.edu.cn/cet/query";
// 获得一个httpclient对象
CloseableHttpClient httpclient = HttpClients.createDefault();
String idCode = idCode(ticketNumber);
String data = idCode + "," + ticketNumber + "," + name;
// 拼接url
url += "?data=" + data + "&v=" + verificationCode;
HttpGet httpGet = new HttpGet(url);
CloseableHttpResponse response = null;
// 设置请求头
httpGet.addHeader("Referer", "http://cet.neea.edu.cn/cet");
httpGet.addHeader("Cookie", cookie);
try {
response = httpclient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == 200) {
return response;
}
} catch (Exception e) {
e.printStackTrace();
throw new InternalError(e.getMessage());
}
return null;
}
/**
* 通过准考证号获取data值的前缀
*
* @param ticketNumber 准考证号
* @return
*/
public static String idCode(String ticketNumber) {
String substring = ticketNumber.substring(9, 10);
//String number = ticketNumber.substring(6, 9);
if ("1".equals(substring)) {
return "CET4_" + 192 + "_DANGCI";
} else {
return "CET6_"+ 192 +"_DANGCI";
}
}
由于是自己做测试的,所以上面的代码写的有些乱了,自己就封装成了一个工具类,代码如下
package top.xpcweb.demotest.utils;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import sun.misc.BASE64Encoder;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 绩查询工具类
*
* @author xpc
* @version 2020-06-17
*/
public class ResultInquiryUtils {
public static final String C = "CET";
public static final String IMG_URL = "http://cet.neea.edu.cn/imgs/";
public static final String QUERY_URL = "http://cache.neea.edu.cn/cet/query";
public static final String REFERER = "http://cet.neea.edu.cn/cet";
/**
* 获取验证码
*
* @param ik 准考证号
* @param response 响应头
* return 图片的Base64编码
*/
public static String getVerificationCode(String ik, HttpServletResponse response) {
if (StringUtils.isBlank(ik)) {
throw new NullPointerException("准考证号不能为null或为空。");
}
if (null == response) {
throw new NullPointerException("响应头不能为null。");
}
String imageToBase64 = "";
String url = "http://cache.neea.edu.cn/Imgs.do?c=" + C + "&ik=" + ik;
// 获得一个httpclient对象
HttpGet httpget = new HttpGet(url);
CloseableHttpClient httpclient = HttpClientBuilder.create().build();
// 设置请求头
httpget.addHeader("Referer", REFERER);
CloseableHttpResponse getResponse = null;
String cookie = "";
try {
getResponse = httpclient.execute(httpget);
if (getResponse.getStatusLine().getStatusCode() == 200) {
String resultString = EntityUtils.toString(getResponse.getEntity(), "UTF-8");
// 取到括号里面的图片名称
String substr = resultString.substring(13, resultString.length() - 3);
String img_url = IMG_URL + substr + ".png";
imageToBase64 = NetImageToBase64(img_url);
// 获取cookies信息
Header[] headers = getResponse.getHeaders("Set-Cookie");
String[] split = headers[0].getValue().split(";");
cookie = split[0];
}
response.setHeader("Eng-Cookie", cookie);
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
} finally {
if (getResponse != null) {
try {
getResponse.close();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
}
}
return imageToBase64;
}
/**
* 网络图片转换Base64的方法
*
* @param netImagePath
*/
private static String NetImageToBase64(String netImagePath) {
ByteArrayOutputStream data = new ByteArrayOutputStream();
try {
// 创建URL
URL url = new URL(netImagePath);
byte[] by = new byte[1024];
// 创建链接
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5000);
InputStream is = conn.getInputStream();
// 将内容读取内存中
int len = -1;
while ((len = is.read(by)) != -1) {
data.write(by, 0, len);
}
// 关闭流
is.close();
} catch (IOException e) {
e.printStackTrace();
}
// 对字节数组Base64编码
BASE64Encoder encoder = new BASE64Encoder();
return encoder.encode(data.toByteArray());
}
/**
* 获取考试成绩
*
* @param ticketNumber 准考证号
* @param name 姓名
* @param verificationCode 验证码
* @param request 请求头
* @return
* @throws UnsupportedEncodingException
*/
public static Map<String, Object> resultInquiry(String ticketNumber, String name, String verificationCode,
HttpServletRequest request) {
if (StringUtils.isBlank(ticketNumber) || StringUtils.isBlank(name)
|| StringUtils.isBlank(verificationCode) || null == request) {
throw new NullPointerException("参数错误。");
}
String cookie = request.getHeader("Eng-Cookie");
if (StringUtils.isBlank(cookie)) {
throw new NullPointerException("Eng-Cookie未传。");
}
Map<String, Object> map = new HashMap<>();
// 获得一个httpclient对象
CloseableHttpClient httpclient = HttpClients.createDefault();
String idCode = idCode(ticketNumber);
String data = idCode + "," + ticketNumber + "," + name;
// 拼接url
String url = QUERY_URL + "?data=" + data + "&v=" + verificationCode;
HttpGet httpGet = new HttpGet(url);
// 设置请求头
httpGet.addHeader("Referer", REFERER);
httpGet.addHeader("Cookie", cookie);
CloseableHttpResponse response = null;
try {
response = httpclient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == 200) {
String resultString = EntityUtils.toString(response.getEntity(), "UTF-8");
// 正则表达式,用于匹配()里面的文字
String skh = "(?<=\\()[^\\)]+";
Pattern pattern = Pattern.compile(skh);
Matcher matcher = pattern.matcher(resultString);
boolean is = matcher.find();
if (is) {
// 把Json数据转为map
map = JSONObject.parseObject(matcher.group());
}
}
} catch (Exception e) {
e.printStackTrace();
throw new InternalError(e.getMessage());
} finally {
if (response != null) {
try {
response.close();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
}
}
return map;
}
/**
* 通过准考证号获取data值的前缀
*
* @param ticketNumber 准考证号
* @return
*/
private static String idCode(String ticketNumber) {
String substring = ticketNumber.substring(9, 10);
// todo 这个前缀可能每年会变
if ("1".equals(substring)) {
return "CET4_192_DANGCI";
} else {
return "CET6_192_DANGCI";
}
}
}
顺便把Controller的代码也贴出来吧
package top.xpcweb.demotest.web;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import top.xpcweb.demotest.utils.ResultInquiryUtils;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.util.Map;
/**
* 绩查询Controller
*
* @author xpc
* @version 2020-06-24
*/
@Controller
@RequestMapping(value = "resultInquiry")
public class ResultInquiryController {
/**
* 返回验证码名称
*
* @param ik 准考证号
*/
@RequestMapping(value = "eng/verificationCode")
@ResponseBody
public String getVerificationCode(String ik, HttpServletRequest request,
HttpServletResponse response) {
String verificationCode = ResultInquiryUtils.getVerificationCode(ik, response);
return verificationCode;
}
/**
* 获取考试成绩接口
*
* @param ticketNumber 准考证号
* @param name 姓名
* @param verificationCode 验证码
* @param request 请求头
* @param response
* @return
*/
@RequestMapping(value = "eng/resultInquiry")
@ResponseBody
public Map<String, Object> resultInquiry(String ticketNumber, String name, String verificationCode,
HttpServletRequest request, HttpServletResponse response) {
Map<String, Object> map = ResultInquiryUtils.resultInquiry(ticketNumber, name, verificationCode, request);
return map;
}
}
也欢迎大家提出我代码中的问题,或更加简便的方法
也欢迎来鱼樂圈,有兴趣的私聊我咱们一起开发此网站