htmlunit分页爬去数据实现

阅读更多
package com.htmlunit.sky;

import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.CollectingAlertHandler;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlPasswordInput;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import com.gargoylesoftware.htmlunit.html.HtmlTextInput;
/**
* 运用htmlUnit爬网页
*
* @author liming
*/
public class SkyHtmlUnit {
// 充值密码
private static final String transPassword = "XXXX";
// 用户名
private static final String userName = "xxx";
// 密码
private static final String userPassword = "XXXX";

/**
* 充值
*
* @param transNumber
*            被充值用户号码
* @param transAmount
*            充值金额
*/
public static boolean recharge(String transNumber, String transAmount) {
boolean bool = false;
try {
BigDecimal b = new BigDecimal(transAmount);
b = b.setScale(0, BigDecimal.ROUND_DOWN);
WebClient client = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);
List collectedAlerts = new ArrayList();
client.setAlertHandler(new CollectingAlertHandler(collectedAlerts));
HtmlPage page1 = (HtmlPage) client
.getPage("http://XXX/vams/");
HtmlForm form = (HtmlForm) page1.getForms().get(0);
HtmlTextInput username = (HtmlTextInput) form
.getInputByName("login:userName");
HtmlPasswordInput password = (HtmlPasswordInput) form
.getInputByName("login:password");
username.setValueAttribute(userName);
password.setValueAttribute(userPassword);
form.getInputByName("login:j_id_jsp_28762255_4").click();
client.setJavaScriptEnabled(true);
String Recharge = "http://XXX/vams/WebPay/WebTrans.faces";
HtmlPage Recharge1 = (HtmlPage) client.getPage(Recharge);
HtmlForm RechargeForm = (HtmlForm) Recharge1.getForms().get(0);
HtmlTextInput number = (HtmlTextInput) RechargeForm
.getInputByName("webPayment:transNumber");
HtmlTextInput transAmountt = (HtmlTextInput) RechargeForm
.getInputByName("webPayment:transAmount");
HtmlPasswordInput transPass = (HtmlPasswordInput) RechargeForm
.getInputByName("webPayment:transPassword");
// 设置充值密码
transPass.setValueAttribute(transPassword);
// 设置充值金额
transAmountt.setValueAttribute(b.toString());
// 设置充值号码
number.setValueAttribute(transNumber);
RechargeForm.getInputByName("webPayment:j_id_jsp_427518967_10")
.click();
String returnvalue = collectedAlerts.get(0);
if (returnvalue.equals("充值成功!")) {
bool = true;
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("**********充值结束*******");
return bool;
}

/**
* 充值结果查询
*
* @param payNoValue
*            充值号码
* @param beforeTime
*            充值时间前 格式YYYYMMdd
* @param afterTime
*            充值时间后 格式YYYYMMdd
* @param rechargeAmount
*            充值金额
* @throws ParseException
*/
public static List getRechargeList(String payNoValue,
String beforeTime, String afterTime, String rechargeAmount
) throws ParseException {
SimpleDateFormat format = new SimpleDateFormat("yyyyMMdd");
SimpleDateFormat formatX = new SimpleDateFormat("yyyy/MM/dd");
String beforeT = "";
String afterT = "";
if (beforeTime != null && !afterTime.equals("")) {
beforeT = formatX.format(format.parse(beforeTime));
}
if (afterTime != null && !afterTime.equals("")) {
afterT = formatX.format(format.parse(afterTime));
}
// 存储集合
List list = new ArrayList();
try {
WebClient client = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);
HtmlPage page1 = (HtmlPage) client
.getPage("http://XXX/vams/");
HtmlForm form = (HtmlForm) page1.getForms().get(0);
HtmlTextInput username = (HtmlTextInput) form
.getInputByName("login:userName");
HtmlPasswordInput password = (HtmlPasswordInput) form
.getInputByName("login:password");
username.setValueAttribute(userName);
password.setValueAttribute(userPassword);
form.getInputByName("login:j_id_jsp_28762255_4").click();
client.setJavaScriptEnabled(true);
String Recharge = "http://XXX/vams/WebPay/WebPaySearch.faces?showType=2";
HtmlPage Recharge1 = (HtmlPage) client.getPage(Recharge);
HtmlForm rechargeForm = (HtmlForm) Recharge1.getForms().get(0);
// 充值号码
HtmlTextInput payNo = (HtmlTextInput) rechargeForm
.getInputByName("form1:payNo");
payNo.setValueAttribute(payNoValue);
// 充值时间:
HtmlTextInput j_id_jsp_1155368641_9 = (HtmlTextInput) rechargeForm
.getInputByName("form1:j_id_jsp_1155368641_9");
j_id_jsp_1155368641_9.setValueAttribute(beforeT);
HtmlTextInput j_id_jsp_1155368641_11 = (HtmlTextInput) rechargeForm
.getInputByName("form1:j_id_jsp_1155368641_11");
j_id_jsp_1155368641_11.setValueAttribute(afterT);
// 充值金额
HtmlTextInput j_id_jsp_1155368641_13 = (HtmlTextInput) rechargeForm
.getInputByName("form1:j_id_jsp_1155368641_13");
j_id_jsp_1155368641_13.setValueAttribute(rechargeAmount);
// button
HtmlSubmitInput button1 = (HtmlSubmitInput) rechargeForm
.getInputByName("form1:j_id_jsp_1155368641_32");
HtmlPage page12 = (HtmlPage) button1.click();
HtmlForm aa = page12.getForms().get(0);
int divSize = aa.getByXPath("//div[@id='zhu1']").size();
if (divSize > 1) {
HtmlDivision div = (HtmlDivision) aa.getByXPath(
"//div[@id='zhu1']").get(1);
String aad = div.asText();
int pageNumber = (Integer
.parseInt(aad.split("共")[2].split("页")[0].trim()));
Map map = new HashMap();
int key = 1;
for (int i = 0; i < pageNumber; i++) {
if (i == 0) {
DomNodeList tables = page12
.getElementsByTagName("table");
final HtmlTable table = (HtmlTable) tables.get(tables
.size() - 1);
for (final HtmlTableRow row : table.getBodies().get(0).getRows()) {
String[] alertPring = new String[7];
int d = 0;
for (final HtmlTableCell cell : row.getCells()) {
alertPring[d] = cell.asText();
d++;
}
map.put(key +"", alertPring);
key++;
}
} else {
HtmlPage ahtmpr = div.getHtmlElementsByTagName("a")
.get(3).click();
DomNodeList tables2 = ahtmpr
.getElementsByTagName("table");
final HtmlTable table = (HtmlTable) tables2.get(tables2
.size() - 1);
for (final HtmlTableRow row : table.getBodies().get(0)
.getRows()) {
String[] alertPring = new String[7];
int d = 0;
for (final HtmlTableCell cell : row.getCells()) {
alertPring[d] = cell.asText();
d++;
}
map.put(key +"", alertPring);
key++;
}
}
}
Set keyV = map.keySet();
Iterator it2 = keyV.iterator();
while (it2.hasNext()) {
RechargeBean bean = new RechargeBean();
String[] values = map.get(it2.next());
for (int i = 0; i < values.length; i++) {
bean.setId(values[0]);
bean.setDate(values[1]);
bean.setNumber(values[2]);
bean.setAmount(values[3]);
bean.setaType(values[4]);
bean.setsType(values[5]);
bean.setCgType(values[6]);
}
list.add(bean);
}
}
System.out.println("********充值结果查询结束***********");
} catch (Exception e) {
e.printStackTrace();
}
return list;
}

/**
* 返回代理商余额
*/
public static String getBalance() {
String balance = "";
try {
WebClient client = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);
HtmlPage page1 = (HtmlPage) client
.getPage("http://XXX/vams/");
HtmlForm form = (HtmlForm) page1.getForms().get(0);
HtmlTextInput username = (HtmlTextInput) form
.getInputByName("login:userName");
HtmlPasswordInput password = (HtmlPasswordInput) form
.getInputByName("login:password");
username.setValueAttribute(userName);
password.setValueAttribute(userPassword);
form.getInputByName("login:j_id_jsp_28762255_4").click();
client.setJavaScriptEnabled(true);
String Recharge = "http://XXX/vams/WebPay/WebPayRemainSearch.faces";
HtmlPage Recharge1 = (HtmlPage) client.getPage(Recharge);
HtmlForm rechargeForm = (HtmlForm) Recharge1.getForms().get(0);
// button按钮请求
HtmlSubmitInput button1 = (HtmlSubmitInput) rechargeForm
.getInputByName("j_id_jsp_1894475275_1:j_id_jsp_1894475275_32");
HtmlPage page3 = (HtmlPage) button1.click();
DomNodeList tables = page3
.getElementsByTagName("table");
final HtmlTable table = (HtmlTable) tables.get(tables.size() - 1);
for (final HtmlTableRow row : table.getBodies().get(0).getRows()) {
int i = 0;
for (final HtmlTableCell cell : row.getCells()) {
if (i == 7) {
HtmlPage page = cell.getHtmlElementsByTagName("a").get(
0).click();
DomNodeList pagezhu = (DomNodeList) page
.getElementById("zhu1").getElementsByTagName(
"td");
for (int j = 1; j < pagezhu.getLength(); j++) {
balance = pagezhu.get(1).asText();
}
}
i++;
}
}
System.out.println("*************代理商余额查询结束***********");
} catch (Exception e) {
e.printStackTrace();
}
return balance;
}

public static void main(String[] args) throws ParseException {
// 充值结果查询
// 201120619
List list = getRechargeList("", "", "", "");
for (int i = 0; i < list.size(); i++) {
RechargeBean bean = list.get(i);
System.out.println(bean.getDate());
}
// getRechargeList("", "", "", "", "");
// 查询余额
// System.out.println(getBalance());

// 进行充值
// System.out.println(recharge("18911019001", ""));
}
}

你可能感兴趣的:(htmlunit,getByXPath)