HttpClient和HtmlParser配合实现自动CAS单点登录系统抽取页面信息

httpclient下载地址:http://mirror.bit.edu.cn/apache//httpcomponents/httpclient/binary/httpcomponents-client-4.5.1-bin.zip


项目中引入所有的jar包,然后看下面的代码

package org.apache.http.examples.client;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.List;


import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;


/**
 * A example that demonstrates how HttpClient APIs can be used to perform
 * form-based logon. 一个例子,演示了如何HttpClient API可用于执行基于表单的登录。
 */
public class ClientFormLogin {
private static final String CAS_URI = "http://192.168.1.121:8088/cas/login";
private static final String USERNAME = "admin";
private static final String PASSWORD = "123456";
private static final String REQ_URI = "http://192.168.1.121:8089/szxy/oa/oasfwj/list";


public static void main(String[] args) throws Exception {
BasicCookieStore cookieStore = new BasicCookieStore();
CloseableHttpClient httpclient = HttpClients.custom()
.setDefaultCookieStore(cookieStore).build();
try {
HttpGet httpget = new HttpGet(CAS_URI);
CloseableHttpResponse response1 = httpclient.execute(httpget);
try {
HttpEntity entity = response1.getEntity();


System.out.println("Login form get: "
+ response1.getStatusLine());
EntityUtils.consume(entity);


System.out.println("Initial set of cookies:");
List cookies = cookieStore.getCookies();
if (cookies.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.size(); i++) {
System.out.println("- " + cookies.get(i).toString());
}
}
} finally {
response1.close();
}


HttpUriRequest login = RequestBuilder.post()
.setUri(new URI(CAS_URI))
.addParameter("username", USERNAME)
.addParameter("password", PASSWORD)
.addParameter("lt", doCasLoginRequest(httpclient, CAS_URI))
.addParameter("_eventId", "submit")
.addParameter("submit", "登录")
.addParameter("execution", "e2s1").build();
CloseableHttpResponse response2 = httpclient.execute(login);


HttpGet httppost = new HttpGet(REQ_URI);
CloseableHttpResponse response3 = httpclient.execute(httppost);
System.out.println("请求访问地址状态码: " + response3.getStatusLine());
// System.out.println(EntityUtils.toString(response3.getEntity()));
String body=EntityUtils.toString(response3.getEntity());
parseHtml(body);
try {
HttpEntity entity = response2.getEntity();


System.out.println("Login form get: "
+ response2.getStatusLine());
EntityUtils.consume(entity);


System.out.println("Post logon cookies:");
List cookies = cookieStore.getCookies();


if (cookies.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.size(); i++) {
System.out.println("- " + cookies.get(i).toString());
}
}
} finally {
response2.close();
}
} finally {
httpclient.close();
}
}
/**
* @Method parseHtml
* @Function 功能描述:解析html
* @param body
* @return
* @throws ParserException
* @Date 2015年11月27日
*/
private static String parseHtml(String body) throws ParserException{
Parser parser = Parser.createParser(body, "UTF-8");
String filterStr = "table";
NodeFilter filter = new TagNameFilter(filterStr);
NodeList tables = parser.extractAllNodesThatMatch(filter);
//找到单位列表所在的表格
TableTag tabletag = (TableTag) tables.elementAt(1);
TableRow row = tabletag.getRow(1);
TableColumn[] cols = row.getColumns();
//System.out.println("单位名称:" + cols[2].toHtml());
System.out.println("单位名称:" + cols[1].childAt(0).getText());
return filterStr;
}
private static void processNodeList(NodeList list, String keyword) {
//迭代开始
SimpleNodeIterator iterator = list.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
//得到该节点的子节点列表
NodeList childList = node.getChildren();
//孩子节点为空,说明是值节点
if (null == childList)
{
//得到值节点的值
String result = node.toPlainTextString();
//若包含关键字,则简单打印出来文本
if (result.indexOf(keyword) != -1)
System.out.println(result);
} //end if
//孩子节点不为空,继续迭代该孩子节点
else 
{
processNodeList(childList, keyword);
}//end else
}//end wile
}
private static String doCasLoginRequest(HttpClient httpclient, String url)
throws IOException {
String result = "";
HttpGet httpget = new HttpGet(url);
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
BufferedReader rd = new BufferedReader(new InputStreamReader(
entity.getContent(), "UTF-8"));
String tempLine = rd.readLine();
String s = " while (tempLine != null) {
int index = tempLine.indexOf(s);
if (index != -1) {
String s1 = tempLine.substring(index + s.length());
int index1 = s1.indexOf("\"");
if (index1 != -1)
result = s1.substring(0, index1);
}
tempLine = rd.readLine();
}
if (entity != null) {
entity.consumeContent();
}
return result;
}
}

你可能感兴趣的:(HttpClient和HtmlParser配合实现自动CAS单点登录系统抽取页面信息)