package com.hoperun.webos.servlet;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.hoperun.webos.pojo.Entity;
import com.thoughtworks.xstream.XStream;
public class WidgetServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
private List<String> categoryArray = new ArrayList<String>();
public WidgetServlet() {
super();
// 待办事宜
categoryArray.add("dbsy");
// 待阅事宜
categoryArray.add("dysy");
// 邮件
categoryArray.add("yj");
}
@Override
protected void service(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
request.setCharacterEncoding("utf-8");
// 请求参数在数组中的下标
int index = 0;
// 获取要提取的信息分类
String param = request.getParameter("category");
index = categoryArray.indexOf(param);
// 参数值大于等于零
if (index >= 0) {
Document doc = getDocument();
// 解析class=txt_12_black的节点,
// 总共为八个结果,我们只需要前六个,根据index的值来选择要解析的element,计算公式为2*i+1
Elements elements = doc.select(".txt_12_black");
Element element = elements.get(2 * index + 1);
// 详细解析
List<Entity> result = detailParse(element);
// 将结果写到response中
response.setCharacterEncoding("utf-8");
response.setContentType("text/html;charset=utf-8");
String finalResult = toXML(result);
// 将result转化为xml格式
response.getWriter().write(finalResult);
}
}
/** 获取url所返回的页面数据 */
private Document getDocument() {
Document doc = null;
HttpClient httpClient = new DefaultHttpClient();
HttpGet httpGet = new HttpGet(
"http://10.20.107.102:8080/res/FrmRight_01.htm");
try {
HttpResponse httpResponse = httpClient.execute(httpGet);
// 返回成功
if (HttpStatus.SC_OK == httpResponse.getStatusLine()
.getStatusCode()) {
// 得到输入流
HttpEntity httpEntity = httpResponse.getEntity();
InputStream is = httpEntity.getContent();
// 解析为html文档
doc = Jsoup.parse(is, "gb2312", "http://10.20.107.102:8080");
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return doc;
}
/** 对element进行详细解析 */
private List<Entity> detailParse(Element element) {
// 返回的结果实体集
List<Entity> array = new ArrayList<Entity>();
Elements elem = element.children();
// 返回记录的分类
String category = null;
for (Element temp : elem) {
// elem直接子元素的标签为tbody
if ("tbody".equals(temp.tagName())) {
Elements tbChild = temp.children();
Element categoryElement = null;
// 当他的直接子元素有两个时,第二个为该记录集合的分类信息,否则记录的分类信息位于第四个直接记录中
if (tbChild.size() == 2) {
categoryElement = tbChild.get(1);
category = categoryElement.text();
}
} else if ("a".equals(temp.tagName())) { // elem直接子元素的标签为a
Entity entity = new Entity();
// 记录项前面是否有选择框
int flag = 0;
Elements as = temp.child(0).child(0).child(0).children();
// 当第一个直接子元素的文本值为空时,则第一个直接子元素为选择框
if (null == as.get(0).text() || "".equals(as.get(0).text())) {
flag = 1;
}
entity.setData1(as.get(0 + flag).text());
entity.setDate2(as.get(1 + flag).text());
entity.setData3(as.get(2 + flag).text());
// 处理待阅事宜前面的选择框
if (as.size() == 4 && flag == 0) {
// 该记录的分类信息位于第四个直接子元素
entity.setData4(as.get(3).text());
} else {
// 该记录的分类信息是分类集合中的一条,所以分类信息位于tbody中
entity.setData4(category);
}
// 格式化地址
String href = temp.attr("href");
entity.setData5("http://10.20.107.102:8080" + formatURL(href));
array.add(entity);
} else if ("tr".equals(temp.tagName())) { // elem直接子元素的标签为tr
// 标签里面包含该记录的分类信息
category = temp.text();
}
}
return array;
}
/** 将result转化为xml格式 */
private String toXML(List<Entity> array) {
XStream xstream = new XStream();
xstream.alias("item", Entity.class);
if (null == array) {
array = new ArrayList<Entity>();
}
return xstream.toXML(array);
}
/** 格式化地址 */
private String formatURL(String href) {
int startIndex = href.indexOf("\'");
int endIndex = href.lastIndexOf("\'");
return href.substring(startIndex + 1, endIndex);
}
}