抽取总方法:
@Override
public List<CrawlerMessage> crawlerData(String path, String startTime, String endTime) {
List<CrawlerMessage> list = new ArrayList<>();
list.add(order(path, startTime, endTime));
list.add(inventory(path, startTime, endTime));
list.add(invoice(path, startTime, endTime));
list.add(payment(path, startTime, endTime));
list.add(delivery(path, startTime, endTime));
// list.add(returnOrder(path, startTime, endTime));
// list.add(sale(path, startTime, endTime));
return null;
}
抽取订单方法:
@Override
protected CrawlerMessage order(String path, String startTime, String endTime) {
logger.info("开始抽取订单数据!");
CrawlerMessage message = new CrawlerMessage(E20001, Order.name(), "订单", "抽取订单成功!");
try {
showOrder(path, endTime);
} catch (IOException e) {
message.setCode(E20003);
message.setMessage("抽取订单异常!");
}
logger.info("订单抽取结束!");
webClient.getOptions().setJavaScriptEnabled(true);
return message;
}
具体抽取方法:
//-----------------------------------抽取方法----------------------------------------------
public void showOrder(String path, String date) throws IOException {
String orderUrl = "https://supplier.rt-mart.com.cn/php/scm_orders_form_1.php?status=1";
String orderDetailUrl = "https://supplier.rt-mart.com.cn/php/";
//页面第25、26张表
Map param = new LinkedHashMap<>();
//key值为a-b-c a:表序号 b:行号 c:列
param.put("26-0-1-1","storeName");//门店编码 订单编号前半部分 如 :订单编号: 5.161300066
param.put("26-0-1-2","orderCode");//订单编号 订单编号后半部分 如: 订单编号: 5.161300066
param.put("26-1-1","orderStatus");//订货状态
param.put("26-2-1","orderDate");//订货日期
param.put("26-3-1","expectArriveDate");//预交货日
param.put("26-4-1","receiveDate");//收货日期
// param.put("26-5-1","");//取货日期
param.put("26-6-1","saleAuction");//促销档期
param.put("26-7-1","orderAmount");//订购金额
// param.put("26-8-1","");//实际金额
param.put("26-9-1","discount");//票折
param.put("26-10-1","discountAmount_NoTax");//折扣金额
param.put("26-11-1","invoiceAmount");//开票金额
param.put("26-12-1","recNumber");//实收总量
param.put("26-13-1","note");//备注
HtmlPage orderMain = webClient.getPage(orderUrl);
/*String orderMainStr = new String(orderMain.asText().getBytes("ISO-8859-1"),"gb2312");
//取订单编号
String temp = orderMainStr.substring(orderMainStr.indexOf("免费样机退货)"));
String orderIdList = temp.substring(7,temp.indexOf("腾讯大润发")).trim();
orderIdList = orderIdList.replaceAll("\\t","").replaceAll("\\r\\n","");
//取订单号数组
String [] orderList = orderIdList.substring(1).split("\\*");*/
HtmlForm orderForm = orderMain.getFormByName("order");
HtmlElement orderTab = orderForm.getElementsByTagName("table").get(0);
List list = orderTab.getHtmlElementsByTagName("table");
String saveFile = FileUtil.getSaveFile(path, date, RTMART, userName, Order.name());
int i = 0;//append 标志
for(HtmlElement ele:list){
//获得门店名称
String storeName = transCoding(ele.getElementsByTagName("li").get(0).asText()).split("\\-")[0];
//取得标签内的地址
for(HtmlElement href:ele.getElementsByTagName("a")){
HtmlAnchor anchor = (HtmlAnchor)href;
logger.info("明细地址为:" + orderDetailUrl + anchor.getHrefAttribute());
webClient.getOptions().setJavaScriptEnabled(false);
HtmlPage page = webClient.getPage(orderDetailUrl + anchor.getHrefAttribute());
// HtmlPage page = href.click();
//直接点击标签
// HtmlPage page = ((HtmlAnchor) orderHrefList.get(i)).click();
writeXml2File(page, param, saveFile, (i++ == 0), storeName);
logger.info("开始抽取订单明细");
Map detailParam = new LinkedHashMap<>();
detailParam.put("24-2-0","barcode");//条 码
detailParam.put("24-2-1","kaproductcode");//货 号
detailParam.put("24-2-2","deliveryCode");//收货号码
detailParam.put("24-2-3","price_tax");//单品开票价
detailParam.put("24-2-4","uom");//单 位
detailParam.put("24-2-5","orderNum");//订购数量
detailParam.put("24-2-6","receiveqty");//实收数量
String detailFile = FileUtil.getSaveFile(path, date, RTMART, userName, OrderDetail.name()+"$"+href.asText().substring(1));
writeXml2File(page, detailParam, detailFile, true, null);
}
}
}
写数据方法:
//----------------------------------写数据方法---------------------------------------------
public void writeXml2File(HtmlPage page, Map param, String saveFile, boolean append, String storeName) throws IOException {
BufferedWriter bufferedWriter = null;
bufferedWriter = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(saveFile,!append), "UTF-8"));
if(append){
//头部写入文件中
String headerInfo = "";
for(String s :param.keySet()){
headerInfo += param.get(s)+"|$|";
}
headerInfo = headerInfo.substring(0,headerInfo.length()-3)+"\r\n";
bufferedWriter.write(headerInfo);//写入头部信息
}
DomNodeList list = page.getElementsByTagName("table");
//取对应表格对应数据方法:
//((HtmlTable) list.get(26)).getRow(3).getCell(1).asText();
String[] arr;
String strLine = "";
String temp = "";
for(String str:param.keySet()){
arr = str.split("-");
temp = transCoding(((HtmlTable) list.get(Integer.parseInt(arr[0]))).getRow(Integer.parseInt(arr[1])).getCell(Integer.parseInt(arr[2])).asText())+"|$|";**
if(str.contains("26-0-1-")){
if(param.get(str).equals("storeName")){
temp = "["+temp.split("\\.")[0]+"]"+storeName +"|$|";
}else {
temp = temp.split("\\.")[1];
}
}
strLine += temp;
}
strLine = strLine.substring(0,strLine.length()-3)+"\r\n";
bufferedWriter.write(strLine);
bufferedWriter.flush();
bufferedWriter.close();
}